Skip to content

Commit 35e820d

Browse files
committed
Merge remote-tracking branch 'ups/develop' into infer_api
2 parents aad8f4d + 3ff9ba0 commit 35e820d

29 files changed

+476
-149
lines changed

cmake/configure.cmake

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,9 @@ if(WITH_GPU)
9292
if(${CUDNN_MAJOR_VERSION} VERSION_LESS 7)
9393
message(FATAL_ERROR "TensorRT needs CUDNN >= 7.0 to compile")
9494
endif()
95+
if(${TENSORRT_MAJOR_VERSION} VERSION_LESS 4)
96+
message(FATAL_ERROR "Paddle needs TensorRT >= 4.0 to compile")
97+
endif()
9598
include_directories(${TENSORRT_INCLUDE_DIR})
9699
endif()
97100
elseif(WITH_AMD_GPU)

paddle/fluid/framework/data_layout.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ enum class DataLayout {
2727
kNHWC = 0,
2828
kNCHW = 1,
2929
kAnyLayout = 2,
30+
kMKLDNN = 3, // all layouts supported by MKLDNN internally
3031
};
3132

3233
inline DataLayout StringToDataLayout(const std::string& str) {
@@ -41,6 +42,8 @@ inline DataLayout StringToDataLayout(const std::string& str) {
4142
return DataLayout::kNCHW;
4243
} else if (s == "ANYLAYOUT") {
4344
return DataLayout::kAnyLayout;
45+
} else if (s == "MKLDNNLAYOUT") {
46+
return DataLayout::kMKLDNN;
4447
} else {
4548
PADDLE_THROW("Unknown storage order string: %s", s);
4649
}
@@ -54,8 +57,10 @@ inline std::string DataLayoutToString(const DataLayout& data_layout) {
5457
return "NCHW";
5558
case DataLayout::kAnyLayout:
5659
return "ANY_LAYOUT";
60+
case DataLayout::kMKLDNN:
61+
return "MKLDNNLAYOUT";
5762
default:
58-
PADDLE_THROW("unknown DataLayou %d", data_layout);
63+
PADDLE_THROW("unknown DataLayout %d", data_layout);
5964
}
6065
}
6166

paddle/fluid/framework/data_layout_transform.cc

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
#include <vector>
1717

1818
#include "paddle/fluid/operators/math/math_function.h"
19+
#ifdef PADDLE_WITH_MKLDNN
20+
#include "paddle/fluid/platform/mkldnn_helper.h"
21+
#endif
1922

2023
namespace paddle {
2124
namespace framework {
@@ -88,5 +91,85 @@ void TransDataLayout(const OpKernelType& kernel_type_for_var,
8891
out->set_layout(expected_kernel_type.data_layout_);
8992
}
9093

94+
#ifdef PADDLE_WITH_MKLDNN
95+
using mkldnn::memory;
96+
using mkldnn::primitive;
97+
using mkldnn::reorder;
98+
99+
void* GetDataFromTensor(const Tensor& tensor, mkldnn::memory::data_type type) {
100+
switch (type) {
101+
case mkldnn::memory::data_type::f32:
102+
return platform::to_void_cast(tensor.data<float>());
103+
case mkldnn::memory::data_type::s8:
104+
return platform::to_void_cast(tensor.data<char>());
105+
case mkldnn::memory::data_type::u8:
106+
return platform::to_void_cast(tensor.data<unsigned char>());
107+
case mkldnn::memory::data_type::s16:
108+
return platform::to_void_cast(tensor.data<int16_t>());
109+
case mkldnn::memory::data_type::s32:
110+
return platform::to_void_cast(tensor.data<int32_t>());
111+
default:
112+
PADDLE_THROW("wrong mkldnn type provided");
113+
}
114+
}
115+
#endif
116+
117+
void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var,
118+
const OpKernelType& expected_kernel_type,
119+
const Tensor& in, Tensor* out) {
120+
auto in_layout = kernel_type_for_var.data_layout_;
121+
auto out_layout = expected_kernel_type.data_layout_;
122+
123+
PADDLE_ENFORCE(
124+
in_layout == DataLayout::kMKLDNN && out_layout != DataLayout::kMKLDNN,
125+
"TransDataLayoutFromMKLDNN only supports transform from MKLDNN to "
126+
"non-MKLDNN");
127+
128+
#ifdef PADDLE_WITH_MKLDNN
129+
PADDLE_ENFORCE(in.format() != memory::format::format_undef &&
130+
in.format() != memory::format::any,
131+
"Input tensor should have specified memory format");
132+
133+
// Set default as NCHW in case not specified
134+
out_layout =
135+
out_layout == DataLayout::kAnyLayout ? DataLayout::kNCHW : out_layout;
136+
137+
auto& pool = platform::DeviceContextPool::Instance();
138+
auto* dev_ctx = dynamic_cast<platform::MKLDNNDeviceContext*>(
139+
pool.Get(expected_kernel_type.place_));
140+
auto& cpu_engine = dev_ctx->GetEngine();
141+
142+
std::vector<int> in_tz = paddle::framework::vectorize2int(in.dims());
143+
std::vector<int> out_tz = in_tz;
144+
145+
memory::data_type in_type = ToMKLDNNDataType(in.type());
146+
PADDLE_ENFORCE(in_type != memory::data_type::data_undef,
147+
"Input tensor type is not supported: ", in.type().name());
148+
memory::data_type out_type = in_type;
149+
150+
memory::format in_format =
151+
in_tz.size() == 2 ? memory::format::nc : in.format();
152+
memory::format out_format =
153+
out_tz.size() == 2 ? memory::format::nc : ToMKLDNNFormat(out_layout);
154+
155+
void* in_data = GetDataFromTensor(in, in_type);
156+
157+
// output tensor has the same dims as input. Reorder don't change dims
158+
out->Resize(in.dims());
159+
160+
auto out_data = out->mutable_data(expected_kernel_type.place_, in.type());
161+
162+
auto in_memory = memory({{{in_tz}, in_type, in_format}, cpu_engine}, in_data);
163+
auto out_memory =
164+
memory({{{out_tz}, out_type, out_format}, cpu_engine}, out_data);
165+
166+
platform::Reorder(in_memory, out_memory);
167+
168+
out->set_layout(out_layout);
169+
// reset format since the out tensor will be feed to non-MKLDNN OPkernel
170+
out->set_format(memory::format::format_undef);
171+
#endif
172+
}
173+
91174
} // namespace framework
92175
} // namespace paddle

paddle/fluid/framework/data_layout_transform.h

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
#pragma once
1616

17+
#include <map>
1718
#include <vector>
1819
#include "paddle/fluid/framework/op_kernel_type.h"
1920
#include "paddle/fluid/framework/tensor.h"
@@ -22,6 +23,50 @@
2223
namespace paddle {
2324
namespace framework {
2425

26+
#ifdef PADDLE_WITH_MKLDNN
27+
using MKLDNNFormat = mkldnn::memory::format;
28+
using MKLDNNDataType = mkldnn::memory::data_type;
29+
30+
inline MKLDNNFormat ToMKLDNNFormat(const DataLayout& layout) {
31+
switch (layout) {
32+
case DataLayout::kNHWC:
33+
return MKLDNNFormat::nhwc;
34+
case DataLayout::kNCHW:
35+
return MKLDNNFormat::nchw;
36+
default:
37+
PADDLE_THROW("Fail to convert layout %s to MKLDNN format",
38+
DataLayoutToString(layout));
39+
}
40+
}
41+
42+
inline DataLayout ToPaddleLayout(const MKLDNNFormat& format) {
43+
switch (format) {
44+
case MKLDNNFormat::nhwc:
45+
return DataLayout::kNHWC;
46+
case MKLDNNFormat::nchw:
47+
return DataLayout::kNCHW;
48+
default:
49+
PADDLE_THROW("Fail to convert MKLDNN format to paddle layout");
50+
}
51+
}
52+
53+
inline MKLDNNDataType ToMKLDNNDataType(const std::type_index type) {
54+
static const std::map<std::type_index, MKLDNNDataType> dict{
55+
{std::type_index(typeid(float)), MKLDNNDataType::f32}, // NOLINT
56+
{std::type_index(typeid(char)), MKLDNNDataType::s8}, // NOLINT
57+
{std::type_index(typeid(unsigned char)), MKLDNNDataType::u8},
58+
{std::type_index(typeid(int16_t)), MKLDNNDataType::s16},
59+
{std::type_index(typeid(int32_t)), MKLDNNDataType::s32}};
60+
auto iter = dict.find(type);
61+
if (iter != dict.end()) return iter->second;
62+
return MKLDNNDataType::data_undef;
63+
}
64+
#endif
65+
66+
void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var,
67+
const OpKernelType& expected_kernel_type,
68+
const Tensor& in, Tensor* out);
69+
2570
std::vector<int> GetAxis(const DataLayout& from, const DataLayout& to);
2671

2772
void TransDataLayout(const OpKernelType& kernel_type_for_var,

paddle/fluid/framework/data_transform.cc

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,34 @@ void DataTransform(const OpKernelType& expected_kernel_type,
3333
Tensor in;
3434
in.ShareDataWith(input_tensor);
3535
Tensor out;
36+
DataLayout lin = kernel_type_for_var.data_layout_;
37+
DataLayout lout = expected_kernel_type.data_layout_;
3638

3739
// do layout transform
38-
if (NeedTransformLayout(expected_kernel_type.data_layout_,
39-
kernel_type_for_var.data_layout_)) {
40-
TransDataLayout(kernel_type_for_var, expected_kernel_type, in, &out);
40+
if (NeedTransformLayout(lout, lin)) {
41+
if (lin == DataLayout::kMKLDNN || lout == DataLayout::kMKLDNN) {
42+
PADDLE_ENFORCE(
43+
!(lin == DataLayout::kMKLDNN && lout == DataLayout::kMKLDNN),
44+
"No layout transform needed between two MKLDNN OPKernels");
45+
46+
if (lin != DataLayout::kMKLDNN && lout == DataLayout::kMKLDNN) {
47+
#ifdef PADDLE_WITH_MKLDNN
48+
// Case1 - transform from Non-MKLDNN OPKernel to MKLDNN OPKernel
49+
// Just set layout/format. No real transform occur
50+
out.ShareDataWith(input_tensor);
51+
out.set_layout(DataLayout::kMKLDNN);
52+
out.set_format(ToMKLDNNFormat(lin));
53+
#endif
54+
} else {
55+
// Case2 - transfrom from MKLDNN OPKernel to Non-MKLDNN OPKernel
56+
// Do transform via MKLDNN lib
57+
TransDataLayoutFromMKLDNN(kernel_type_for_var, expected_kernel_type, in,
58+
&out);
59+
}
60+
} else {
61+
// Case3 - transfrom between Non-MKLDNN OPKernels
62+
TransDataLayout(kernel_type_for_var, expected_kernel_type, in, &out);
63+
}
4164
transformed = true;
4265
PassTensorData(&out, &in);
4366
}

paddle/fluid/framework/details/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ cc_library(variable_visitor SRCS variable_visitor.cc DEPS lod_tensor selected_ro
1313

1414
if(WITH_GPU)
1515
nv_library(nccl_all_reduce_op_handle SRCS nccl_all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
16-
dynload_cuda)
16+
dynload_cuda variable_visitor)
1717
set(multi_devices_graph_builder_deps nccl_all_reduce_op_handle)
1818
nv_library(reduce_op_handle SRCS reduce_op_handle.cc DEPS op_handle_base variable_visitor scope ddim dynload_cuda)
1919
nv_library(broadcast_op_handle SRCS broadcast_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor dynload_cuda)
@@ -25,6 +25,7 @@ else()
2525
endif()
2626

2727
cc_library(gather_op_handle SRCS gather_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor)
28+
cc_library(fuse_vars_op_handle SRCS fuse_vars_op_handle.cc DEPS op_handle_base scope)
2829

2930
cc_library(multi_devices_graph_builder SRCS multi_devices_graph_builder.cc DEPS ssa_graph_builder computation_op_handle
3031
scale_loss_grad_op_handle rpc_op_handle ${multi_devices_graph_builder_deps} reduce_op_handle broadcast_op_handle)
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "paddle/fluid/framework/details/fuse_vars_op_handle.h"
16+
17+
namespace paddle {
18+
namespace framework {
19+
namespace details {
20+
21+
void FuseVarsOpHandle::RunImpl() {
22+
WaitInputVarGenerated(place_);
23+
24+
auto in_var_handles = DynamicCast<VarHandle>(this->Inputs());
25+
auto out_var_handles = DynamicCast<VarHandle>(this->Outputs());
26+
PADDLE_ENFORCE_EQ(in_var_handles.size(), 0);
27+
PADDLE_ENFORCE_EQ(out_var_handles.size() - 1, inputs_numel_.size(), "");
28+
29+
auto scope = local_scope_->FindVar(kLocalExecScopeName)->Get<Scope *>();
30+
31+
auto out_var_handle = out_var_handles[0];
32+
auto out_var = scope->Var(out_var_handle->name_);
33+
34+
auto out_tensor = out_var->GetMutable<LoDTensor>();
35+
out_tensor->Resize({total_numel_}).mutable_data(this->place_, type_);
36+
37+
int64_t s = 0;
38+
for (size_t i = 1; i < out_var_handles.size(); ++i) {
39+
auto out_name = out_var_handles[i]->name_;
40+
auto out_t = scope->Var(out_name)->GetMutable<LoDTensor>();
41+
auto numel = this->inputs_numel_.at(out_name);
42+
out_t->ShareDataWith(out_tensor->Slice(s, s + numel));
43+
s += numel;
44+
}
45+
this->RunAndRecordEvent([this] {});
46+
}
47+
48+
std::string FuseVarsOpHandle::Name() const { return "fuse vars"; }
49+
} // namespace details
50+
} // namespace framework
51+
} // namespace paddle
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#pragma once
16+
17+
#include <map>
18+
#include <string>
19+
#include <vector>
20+
21+
#include "paddle/fluid/framework/details/container_cast.h"
22+
#include "paddle/fluid/framework/details/op_handle_base.h"
23+
#include "paddle/fluid/framework/lod_tensor.h"
24+
#include "paddle/fluid/framework/scope.h"
25+
#include "paddle/fluid/platform/device_context.h"
26+
27+
namespace paddle {
28+
namespace framework {
29+
namespace details {
30+
31+
struct FuseVarsOpHandle : public OpHandleBase {
32+
public:
33+
FuseVarsOpHandle(Scope *local_scope, const platform::Place &place,
34+
const std::unordered_map<std::string, int64_t> &inputs_numel,
35+
const std::type_index &var_type)
36+
: local_scope_(local_scope),
37+
place_(place),
38+
inputs_numel_(inputs_numel),
39+
type_(var_type) {
40+
total_numel_ = 0;
41+
for (auto in_numel : inputs_numel) {
42+
PADDLE_ENFORCE_GT(in_numel.second, 0);
43+
total_numel_ += in_numel.second;
44+
}
45+
}
46+
47+
std::string Name() const override;
48+
49+
bool IsMultiDeviceTransfer() override { return false; };
50+
51+
protected:
52+
void RunImpl() override;
53+
54+
private:
55+
Scope *local_scope_;
56+
const platform::Place place_;
57+
const std::unordered_map<std::string, int64_t> inputs_numel_;
58+
const std::type_index type_;
59+
int64_t total_numel_;
60+
};
61+
} // namespace details
62+
} // namespace framework
63+
} // namespace paddle

0 commit comments

Comments
 (0)