Skip to content

Commit 3ff9ba0

Browse files
mozga-intelluotao1
authored andcommitted
Mkldnn layout (#11040)
* Add MKLDNN layout support in Paddle Add MKLDNN layout in Paddle so that MKLDNN friendly memory layout can be used in MKLDNN enabled OP kernel. Before this commit, NCHW is hardcode to be used in all MKLDNN op kernels. As a result, non-optimized execution path is selected in MKLDNN primitive which bring worse performance. Besides framework change, three MKLDNN OP kernels were updated for using new MKLDNN layout. They are conv/pool2d/batch_norm. Other MKLDNN OP kernels need be also updated in similar way to achieve best performance. * Add MKLDNN layout support in activation OP * Don't populate layout from input to output when kMKLDNN in * Refine pool mkldnn op kernel * MKLDNN layout * Remove the inferitance from tensor file * MKLDNN layout: refactoring * Remove additional #define to register new operator * Prepare mkldnn tests to work with layout
1 parent a1e046b commit 3ff9ba0

18 files changed

+287
-43
lines changed

paddle/fluid/framework/data_layout.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ enum class DataLayout {
2727
kNHWC = 0,
2828
kNCHW = 1,
2929
kAnyLayout = 2,
30+
kMKLDNN = 3, // all layouts supported by MKLDNN internally
3031
};
3132

3233
inline DataLayout StringToDataLayout(const std::string& str) {
@@ -41,6 +42,8 @@ inline DataLayout StringToDataLayout(const std::string& str) {
4142
return DataLayout::kNCHW;
4243
} else if (s == "ANYLAYOUT") {
4344
return DataLayout::kAnyLayout;
45+
} else if (s == "MKLDNNLAYOUT") {
46+
return DataLayout::kMKLDNN;
4447
} else {
4548
PADDLE_THROW("Unknown storage order string: %s", s);
4649
}
@@ -54,8 +57,10 @@ inline std::string DataLayoutToString(const DataLayout& data_layout) {
5457
return "NCHW";
5558
case DataLayout::kAnyLayout:
5659
return "ANY_LAYOUT";
60+
case DataLayout::kMKLDNN:
61+
return "MKLDNNLAYOUT";
5762
default:
58-
PADDLE_THROW("unknown DataLayou %d", data_layout);
63+
PADDLE_THROW("unknown DataLayout %d", data_layout);
5964
}
6065
}
6166

paddle/fluid/framework/data_layout_transform.cc

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
#include <vector>
1717

1818
#include "paddle/fluid/operators/math/math_function.h"
19+
#ifdef PADDLE_WITH_MKLDNN
20+
#include "paddle/fluid/platform/mkldnn_helper.h"
21+
#endif
1922

2023
namespace paddle {
2124
namespace framework {
@@ -88,5 +91,85 @@ void TransDataLayout(const OpKernelType& kernel_type_for_var,
8891
out->set_layout(expected_kernel_type.data_layout_);
8992
}
9093

94+
#ifdef PADDLE_WITH_MKLDNN
95+
using mkldnn::memory;
96+
using mkldnn::primitive;
97+
using mkldnn::reorder;
98+
99+
void* GetDataFromTensor(const Tensor& tensor, mkldnn::memory::data_type type) {
100+
switch (type) {
101+
case mkldnn::memory::data_type::f32:
102+
return platform::to_void_cast(tensor.data<float>());
103+
case mkldnn::memory::data_type::s8:
104+
return platform::to_void_cast(tensor.data<char>());
105+
case mkldnn::memory::data_type::u8:
106+
return platform::to_void_cast(tensor.data<unsigned char>());
107+
case mkldnn::memory::data_type::s16:
108+
return platform::to_void_cast(tensor.data<int16_t>());
109+
case mkldnn::memory::data_type::s32:
110+
return platform::to_void_cast(tensor.data<int32_t>());
111+
default:
112+
PADDLE_THROW("wrong mkldnn type provided");
113+
}
114+
}
115+
#endif
116+
117+
void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var,
118+
const OpKernelType& expected_kernel_type,
119+
const Tensor& in, Tensor* out) {
120+
auto in_layout = kernel_type_for_var.data_layout_;
121+
auto out_layout = expected_kernel_type.data_layout_;
122+
123+
PADDLE_ENFORCE(
124+
in_layout == DataLayout::kMKLDNN && out_layout != DataLayout::kMKLDNN,
125+
"TransDataLayoutFromMKLDNN only supports transform from MKLDNN to "
126+
"non-MKLDNN");
127+
128+
#ifdef PADDLE_WITH_MKLDNN
129+
PADDLE_ENFORCE(in.format() != memory::format::format_undef &&
130+
in.format() != memory::format::any,
131+
"Input tensor should have specified memory format");
132+
133+
// Set default as NCHW in case not specified
134+
out_layout =
135+
out_layout == DataLayout::kAnyLayout ? DataLayout::kNCHW : out_layout;
136+
137+
auto& pool = platform::DeviceContextPool::Instance();
138+
auto* dev_ctx = dynamic_cast<platform::MKLDNNDeviceContext*>(
139+
pool.Get(expected_kernel_type.place_));
140+
auto& cpu_engine = dev_ctx->GetEngine();
141+
142+
std::vector<int> in_tz = paddle::framework::vectorize2int(in.dims());
143+
std::vector<int> out_tz = in_tz;
144+
145+
memory::data_type in_type = ToMKLDNNDataType(in.type());
146+
PADDLE_ENFORCE(in_type != memory::data_type::data_undef,
147+
"Input tensor type is not supported: ", in.type().name());
148+
memory::data_type out_type = in_type;
149+
150+
memory::format in_format =
151+
in_tz.size() == 2 ? memory::format::nc : in.format();
152+
memory::format out_format =
153+
out_tz.size() == 2 ? memory::format::nc : ToMKLDNNFormat(out_layout);
154+
155+
void* in_data = GetDataFromTensor(in, in_type);
156+
157+
// output tensor has the same dims as input. Reorder don't change dims
158+
out->Resize(in.dims());
159+
160+
auto out_data = out->mutable_data(expected_kernel_type.place_, in.type());
161+
162+
auto in_memory = memory({{{in_tz}, in_type, in_format}, cpu_engine}, in_data);
163+
auto out_memory =
164+
memory({{{out_tz}, out_type, out_format}, cpu_engine}, out_data);
165+
166+
platform::Reorder(in_memory, out_memory);
167+
168+
out->set_layout(out_layout);
169+
// reset format since the out tensor will be feed to non-MKLDNN OPkernel
170+
out->set_format(memory::format::format_undef);
171+
#endif
172+
}
173+
91174
} // namespace framework
92175
} // namespace paddle

paddle/fluid/framework/data_layout_transform.h

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
#pragma once
1616

17+
#include <map>
1718
#include <vector>
1819
#include "paddle/fluid/framework/op_kernel_type.h"
1920
#include "paddle/fluid/framework/tensor.h"
@@ -22,6 +23,50 @@
2223
namespace paddle {
2324
namespace framework {
2425

26+
#ifdef PADDLE_WITH_MKLDNN
27+
using MKLDNNFormat = mkldnn::memory::format;
28+
using MKLDNNDataType = mkldnn::memory::data_type;
29+
30+
inline MKLDNNFormat ToMKLDNNFormat(const DataLayout& layout) {
31+
switch (layout) {
32+
case DataLayout::kNHWC:
33+
return MKLDNNFormat::nhwc;
34+
case DataLayout::kNCHW:
35+
return MKLDNNFormat::nchw;
36+
default:
37+
PADDLE_THROW("Fail to convert layout %s to MKLDNN format",
38+
DataLayoutToString(layout));
39+
}
40+
}
41+
42+
inline DataLayout ToPaddleLayout(const MKLDNNFormat& format) {
43+
switch (format) {
44+
case MKLDNNFormat::nhwc:
45+
return DataLayout::kNHWC;
46+
case MKLDNNFormat::nchw:
47+
return DataLayout::kNCHW;
48+
default:
49+
PADDLE_THROW("Fail to convert MKLDNN format to paddle layout");
50+
}
51+
}
52+
53+
inline MKLDNNDataType ToMKLDNNDataType(const std::type_index type) {
54+
static const std::map<std::type_index, MKLDNNDataType> dict{
55+
{std::type_index(typeid(float)), MKLDNNDataType::f32}, // NOLINT
56+
{std::type_index(typeid(char)), MKLDNNDataType::s8}, // NOLINT
57+
{std::type_index(typeid(unsigned char)), MKLDNNDataType::u8},
58+
{std::type_index(typeid(int16_t)), MKLDNNDataType::s16},
59+
{std::type_index(typeid(int32_t)), MKLDNNDataType::s32}};
60+
auto iter = dict.find(type);
61+
if (iter != dict.end()) return iter->second;
62+
return MKLDNNDataType::data_undef;
63+
}
64+
#endif
65+
66+
void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var,
67+
const OpKernelType& expected_kernel_type,
68+
const Tensor& in, Tensor* out);
69+
2570
std::vector<int> GetAxis(const DataLayout& from, const DataLayout& to);
2671

2772
void TransDataLayout(const OpKernelType& kernel_type_for_var,

paddle/fluid/framework/data_transform.cc

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,34 @@ void DataTransform(const OpKernelType& expected_kernel_type,
3333
Tensor in;
3434
in.ShareDataWith(input_tensor);
3535
Tensor out;
36+
DataLayout lin = kernel_type_for_var.data_layout_;
37+
DataLayout lout = expected_kernel_type.data_layout_;
3638

3739
// do layout transform
38-
if (NeedTransformLayout(expected_kernel_type.data_layout_,
39-
kernel_type_for_var.data_layout_)) {
40-
TransDataLayout(kernel_type_for_var, expected_kernel_type, in, &out);
40+
if (NeedTransformLayout(lout, lin)) {
41+
if (lin == DataLayout::kMKLDNN || lout == DataLayout::kMKLDNN) {
42+
PADDLE_ENFORCE(
43+
!(lin == DataLayout::kMKLDNN && lout == DataLayout::kMKLDNN),
44+
"No layout transform needed between two MKLDNN OPKernels");
45+
46+
if (lin != DataLayout::kMKLDNN && lout == DataLayout::kMKLDNN) {
47+
#ifdef PADDLE_WITH_MKLDNN
48+
// Case1 - transform from Non-MKLDNN OPKernel to MKLDNN OPKernel
49+
// Just set layout/format. No real transform occur
50+
out.ShareDataWith(input_tensor);
51+
out.set_layout(DataLayout::kMKLDNN);
52+
out.set_format(ToMKLDNNFormat(lin));
53+
#endif
54+
} else {
55+
// Case2 - transfrom from MKLDNN OPKernel to Non-MKLDNN OPKernel
56+
// Do transform via MKLDNN lib
57+
TransDataLayoutFromMKLDNN(kernel_type_for_var, expected_kernel_type, in,
58+
&out);
59+
}
60+
} else {
61+
// Case3 - transfrom between Non-MKLDNN OPKernels
62+
TransDataLayout(kernel_type_for_var, expected_kernel_type, in, &out);
63+
}
4164
transformed = true;
4265
PassTensorData(&out, &in);
4366
}

paddle/fluid/framework/op_kernel_type.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,14 @@ inline std::string KernelTypeToString(const OpKernelType& kernel_key) {
8787
}
8888

8989
inline bool NeedTransformLayout(const DataLayout& l, const DataLayout& r) {
90-
return l != DataLayout::kAnyLayout && r != DataLayout::kAnyLayout && l != r;
90+
bool ret =
91+
(l != DataLayout::kAnyLayout && r != DataLayout::kAnyLayout && l != r);
92+
#ifdef PADDLE_WITH_MKLDNN
93+
// Layout transform needed for either non-MKLDNN to MKLDNN or vice versa
94+
ret |= (l != DataLayout::kMKLDNN && r == DataLayout::kMKLDNN);
95+
ret |= (l == DataLayout::kMKLDNN && r != DataLayout::kMKLDNN);
96+
#endif
97+
return ret;
9198
}
9299

93100
inline bool TransFromNeeded(const OpKernelType& l, const OpKernelType& r) {

paddle/fluid/framework/op_registry.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,14 @@ struct OpKernelRegistrarFunctor<PlaceType, false, I, KernelTypes...> {
8383

8484
void operator()(const char* op_type, const char* library_type) const {
8585
using T = typename KERNEL_TYPE::ELEMENT_TYPE;
86+
std::string library(library_type);
87+
std::string data_layout = "ANYLAYOUT";
88+
if (library == "MKLDNN") {
89+
data_layout = "MKLDNNLAYOUT";
90+
}
8691
OpKernelType key(ToDataType(std::type_index(typeid(T))), PlaceType(),
87-
DataLayout::kAnyLayout, StringToLibraryType(library_type));
92+
StringToDataLayout(data_layout),
93+
StringToLibraryType(library_type));
8894
OperatorWithKernel::AllOpKernels()[op_type][key].reset(new KERNEL_TYPE);
8995

9096
constexpr auto size = std::tuple_size<std::tuple<KernelTypes...>>::value;
@@ -99,7 +105,8 @@ struct OpKernelRegistrarFunctor<PlaceType, true, I, KernelType...> {
99105
void operator()(const char* op_type, const char* library_type) const {}
100106
};
101107

102-
// User can register many kernel in one place. The data type could be different.
108+
// User can register many kernel in one place. The data type could be
109+
// different.
103110
template <typename PlaceType, typename... KernelType>
104111
class OpKernelRegistrar : public Registrar {
105112
public:

paddle/fluid/framework/operator.cc

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -444,10 +444,25 @@ class RuntimeInferShapeContext : public InferShapeContext {
444444
auto* out_tensor = out_var->GetMutable<LoDTensor>();
445445
out_tensor->set_lod(in_tensor.lod());
446446

447-
// TODO(dzhwinter) : reuse ShareLoD in most operators.
448-
// Need to call ShareLayout explicitly in sequence related ops.
449-
// Shall we have a better method to shared info between in/out Tensor?
450-
out_tensor->set_layout(in_tensor.layout());
447+
// TODO(dzhwinter) : reuse ShareLoD in most operators.
448+
// Need to call ShareLayout explicitly in sequence related ops.
449+
// Shall we have a better method to shared info between in/out Tensor?
450+
#ifdef PADDLE_WITH_MKLDNN
451+
// Fix me: ugly workaround below
452+
// Correct solution:
453+
// set_layout() should NOT be called here (i.e. ShareLoD). Instead,
454+
// layout of output tensor should be set "manually" in Compute()
455+
// of each OPKernel. The reason layout should NOT be shared between
456+
// input and output "automatically" (now by InferShape()->ShareLoD())
457+
// is that layout transform may occur after InferShape().
458+
// Workaround:
459+
// Skip set_layout() when input layout is kMKLDNN
460+
// This is to avoid kMKLDNN is populated wrongly into a non-MKLDNN
461+
// OPKernel. In all MKLDNN OPkernel, set_layout(kMKLDNN) should be called
462+
// in Compute()
463+
if (in_tensor.layout() != DataLayout::kMKLDNN)
464+
#endif
465+
out_tensor->set_layout(in_tensor.layout());
451466
}
452467

453468
void ShareLayout(const std::string& in, const std::string& out, size_t i = 0,
@@ -665,7 +680,8 @@ OpKernelType OperatorWithKernel::GetExpectedKernelType(
665680
OpKernelType OperatorWithKernel::GetKernelTypeForVar(
666681
const std::string& var_name, const Tensor& tensor,
667682
const OpKernelType& expected_kernel_type) const {
668-
return OpKernelType(expected_kernel_type.data_type_, tensor.place());
683+
return OpKernelType(expected_kernel_type.data_type_, tensor.place(),
684+
tensor.layout());
669685
}
670686

671687
} // namespace framework

paddle/fluid/framework/tensor.h

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,28 @@ namespace framework {
3434
class LoDTensor;
3535

3636
class Tensor {
37+
#ifdef PADDLE_WITH_MKLDNN
38+
39+
public:
40+
inline mkldnn::memory::format format() const { return format_; }
41+
42+
inline void set_format(const mkldnn::memory::format format) {
43+
format_ = format;
44+
}
45+
46+
protected:
47+
/**
48+
* @brief the detail format of memory block which have layout as kMKLDNN
49+
*
50+
* @note MKLDNN lib support various memory format like nchw, nhwc, nChw8C,
51+
* nChw16c, etc. For a MKLDNN memory block, layout will be set as
52+
* DataLayout::kMKLDNN meanwhile detail memory format will be kept in
53+
* this field.
54+
*/
55+
56+
mkldnn::memory::format format_ = mkldnn::memory::format::format_undef;
57+
#endif
58+
3759
public:
3860
template <typename T, size_t D, int MajorType, typename IndexType>
3961
friend struct EigenTensor;
@@ -195,8 +217,10 @@ class Tensor {
195217
* N,C,H,W for respectively the batch size, the number of
196218
* feature maps, the height.
197219
*/
198-
199-
DataLayout layout_ = DataLayout::kNHWC;
220+
// Fix me: here just change the default layout to kNCHW
221+
// it doesn't fix the real issue, i.e. feeder should set up tensor layout
222+
// according to actual input data
223+
DataLayout layout_ = DataLayout::kNCHW;
200224

201225
/**
202226
* @brief A PlaceHolder may be shared by more than one tensor.

paddle/fluid/framework/tensor_test.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ TEST(Tensor, ReshapeToMatrix) {
209209

210210
TEST(Tensor, Layout) {
211211
framework::Tensor src;
212-
ASSERT_EQ(src.layout(), framework::DataLayout::kNHWC);
212+
ASSERT_EQ(src.layout(), framework::DataLayout::kNCHW);
213213
src.set_layout(framework::DataLayout::kAnyLayout);
214214
ASSERT_EQ(src.layout(), framework::DataLayout::kAnyLayout);
215215
}

paddle/fluid/operators/activation_op.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,14 +58,16 @@ framework::OpKernelType GetKernelType(const framework::ExecutionContext& ctx,
5858
const framework::OperatorWithKernel& oper,
5959
const std::string& name) {
6060
framework::LibraryType library{framework::LibraryType::kPlain};
61+
62+
framework::DataLayout layout = framework::DataLayout::kAnyLayout;
6163
#ifdef PADDLE_WITH_MKLDNN
6264
auto it = oper.Attrs().find("use_mkldnn");
6365
if (library == framework::LibraryType::kPlain && it != oper.Attrs().end() &&
6466
platform::CanMKLDNNBeUsed(ctx)) {
6567
library = framework::LibraryType::kMKLDNN;
68+
layout = framework::DataLayout::kMKLDNN;
6669
}
6770
#endif
68-
framework::DataLayout layout = framework::DataLayout::kAnyLayout;
6971
return framework::OpKernelType(
7072
framework::ToDataType(ctx.Input<framework::Tensor>(name)->type()),
7173
ctx.GetPlace(), layout, library);

0 commit comments

Comments
 (0)