Skip to content

Commit 79d555b

Browse files
committed
Merge branch 'develop' into mkldnn
2 parents c6d230e + 59e1092 commit 79d555b

30 files changed

+630
-150
lines changed

doc/v2/build_and_install/build_from_source_cn.rst

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ PaddlePaddle需要使用Docker环境完成编译,这样可以免去单独安
106106

107107
- 学习 Docker 有多难?
108108

109-
理解 Docker 并不难,大概花十分钟看一下 `这篇文章 <https://zhuanlan.zhihu.com/p/19902938>`_ 。这可以帮您省掉花一小时安装和配置各种开发工具,以及切换机器时需要新安装的辛苦。别忘了 PaddlePaddle 更新可能导致需要新的开发工具。更别提简化问题复现带来的好处了。
109+
理解 Docker 并不难,大概花十分钟看一下 `如何使用Docker <https://zhuanlan.zhihu.com/p/19902938>`_ 。这可以帮您省掉花一小时安装和配置各种开发工具,以及切换机器时需要新安装的辛苦。别忘了 PaddlePaddle 更新可能导致需要新的开发工具。更别提简化问题复现带来的好处了。
110110

111111
- 我可以用 IDE 吗?
112112

@@ -123,19 +123,19 @@ PaddlePaddle需要使用Docker环境完成编译,这样可以免去单独安
123123

124124
- 可以并行编译吗?
125125

126-
是的。我们的 Docker image 运行一个 `Bash脚本 <https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build.sh>`_ 。这个脚本调用 `make -j$(nproc)` 来启动和 CPU 核一样多的进程来并行编译。
126+
是的。我们的 Docker image 运行一个 `Paddle编译Bash脚本 <https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build.sh>`_ 。这个脚本调用 `make -j$(nproc)` 来启动和 CPU 核一样多的进程来并行编译。
127127

128128
- Docker 需要 sudo
129129

130130
如果用自己的电脑开发,自然也就有管理员权限(sudo)了。如果用公用的电脑开发,需要请管理员安装和配置好 Docker。此外,PaddlePaddle 项目在努力开始支持其他不需要 sudo 的集装箱技术,比如 rkt。
131131

132132
- 在 Windows/MacOS 上编译很慢
133133

134-
Docker 在 Windows 和 MacOS 都可以运行。不过实际上是运行在一个 Linux 虚拟机上。可能需要注意给这个虚拟机多分配一些 CPU 和内存,以保证编译高效。具体做法请参考 `这个issue <https://github.com/PaddlePaddle/Paddle/issues/627>`_ 。
134+
Docker 在 Windows 和 MacOS 都可以运行。不过实际上是运行在一个 Linux 虚拟机上。可能需要注意给这个虚拟机多分配一些 CPU 和内存,以保证编译高效。具体做法请参考 `如何为Windows/Mac计算机上的Docker增加内存和虚拟机 <https://github.com/PaddlePaddle/Paddle/issues/627>`_ 。
135135

136136
- 磁盘不够
137137

138-
本文中的例子里,`docker run` 命令里都用了 `--rm` 参数,这样保证运行结束之后的 containers 不会保留在磁盘上。可以用 `docker ps -a` 命令看到停止后但是没有删除的 containers。`docker build` 命令有时候会产生一些中间结果,是没有名字的 images,也会占用磁盘。可以参考 `这篇文章 <https://zaiste.net/posts/removing_docker_containers/>`_ 来清理这些内容。
138+
本文中的例子里,`docker run` 命令里都用了 `--rm` 参数,这样保证运行结束之后的 containers 不会保留在磁盘上。可以用 `docker ps -a` 命令看到停止后但是没有删除的 containers。`docker build` 命令有时候会产生一些中间结果,是没有名字的 images,也会占用磁盘。可以参考 `如何删除Docker Container <https://zaiste.net/posts/removing_docker_containers/>`_ 来清理这些内容。
139139

140140

141141
.. _compile_deps:
@@ -195,7 +195,7 @@ BLAS
195195

196196
PaddlePaddle支持 `MKL <https://software.intel.com/en-us/intel-mkl>`_ 和
197197
`OpenBlAS <http://www.openblas.net/>`_ 两种BLAS库。默认使用MKL。如果使用MKL并且机器含有AVX2指令集,
198-
还会下载MKL-DNN数学库,详细参考 `这里 <https://github.com/PaddlePaddle/Paddle/tree/develop/doc/design/mkldnn#cmake>`_ 。
198+
还会下载MKL-DNN数学库,详细参考 `mkldnn设计文档 <https://github.com/PaddlePaddle/Paddle/tree/develop/doc/design/mkldnn#cmake>`_ 。
199199

200200
如果关闭MKL,则会使用OpenBLAS作为BLAS库。
201201

paddle/fluid/framework/data_type.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ struct DataTypeMap {
2828
};
2929

3030
static DataTypeMap* InitDataTypeMap();
31+
// C++11 removes the need for manual locking. Concurrent execution shall wait if
32+
// a static local variable is already being initialized.
33+
// https://stackoverflow.com/questions/11711920/how-to-implement-multithread-safe-singleton-in-c11-without-using-mutex
3134
static DataTypeMap& gDataTypeMap() {
3235
static DataTypeMap* g_data_type_map_ = InitDataTypeMap();
3336
return *g_data_type_map_;

paddle/fluid/framework/details/fuse_vars_op_handle.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ void FuseVarsOpHandle::RunImpl() {
4242
out_t->ShareDataWith(out_tensor->Slice(s, s + numel));
4343
s += numel;
4444
}
45-
this->RunAndRecordEvent([this] {});
45+
this->RunAndRecordEvent([] {});
4646
}
4747

4848
std::string FuseVarsOpHandle::Name() const { return "fuse vars"; }

paddle/fluid/inference/tensorrt/convert/ut_helper.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,8 @@ class TRTConvertValidation {
151151
// Compare two output
152152
ASSERT_FALSE(fluid_out.empty());
153153
for (size_t i = 0; i < fluid_out.size(); i++) {
154-
EXPECT_LT(std::abs(fluid_out[i] - trt_out[i]), 1e-6);
154+
// Loose the threshold for CI in different machine model.
155+
EXPECT_LT(std::abs(fluid_out[i] - trt_out[i]), 2e-5);
155156
}
156157
}
157158
}

paddle/fluid/operators/activation_op.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,12 @@ namespace operators {
2424
: public ::paddle::framework::OpProtoAndCheckerMaker { \
2525
public: \
2626
void Make() override { \
27-
AddInput("X", "Input of " #OP_NAME "operator"); \
28-
AddOutput("Out", "Output of" #OP_NAME "operator"); \
27+
AddInput("X", "Input of " #OP_NAME " operator"); \
28+
AddOutput("Out", "Output of " #OP_NAME " operator"); \
2929
AddAttr<bool>("use_mkldnn", \
3030
"(bool, default false) Only used in mkldnn kernel") \
3131
.SetDefault(false); \
32-
AddComment(#OP_COMMENT); \
32+
AddComment(OP_COMMENT); \
3333
} \
3434
}
3535

paddle/fluid/operators/crop_op.cc

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,13 @@ class CropOp : public framework::OperatorWithKernel {
4848
ctx->SetOutputDim("Out", y_dim);
4949
}
5050
}
51+
52+
framework::OpKernelType GetExpectedKernelType(
53+
const framework::ExecutionContext& ctx) const override {
54+
return framework::OpKernelType(
55+
framework::ToDataType(ctx.Input<framework::LoDTensor>("X")->type()),
56+
ctx.device_context());
57+
}
5158
};
5259

5360
class CropOpMaker : public framework::OpProtoAndCheckerMaker {
@@ -60,13 +67,19 @@ class CropOpMaker : public framework::OpProtoAndCheckerMaker {
6067
"The input used as reference for cropping, "
6168
"which is of the same dimensions as X.")
6269
.AsDispensable();
70+
AddInput("Offsets",
71+
"The input used to describe offsets in runtime, which is a "
72+
"1-D vector whose size equals to the rank of input 'X'. The "
73+
"elements data type must be int.")
74+
.AsDispensable();
6375
AddOutput("Out",
6476
"The output of crop op, "
6577
"which is of the same dimensions as X.");
6678
AddAttr<std::vector<int>>("offsets",
6779
"A list<int> describing offsets to be cropped. "
6880
"The size of offsets list should be the same as "
69-
"the dimension size of input X.");
81+
"the dimension size of input X.")
82+
.SetDefault(std::vector<int>());
7083
AddAttr<std::vector<int>>("shape",
7184
"A list<int> describing the shape of output. "
7285
"The size of shape list should be the same as "
@@ -77,6 +90,17 @@ Crop Operator.
7790
7891
Crop input into output, as specified by offsets and shape.
7992
93+
There are two ways to set the offsets:
94+
1. In runtime: Using the input 'Offsets', which is a Vairbale and can be
95+
output of other operators. This way is suitable for
96+
dynamic offsets.
97+
2. In network configuration: Using the attribute 'offsets', which will be
98+
set in Python configure script. This way is
99+
suitable for fixed offsets.
100+
You CANNOT use these two ways at the same time. An exception will be raised
101+
if input 'Offset' is configured and meanwhile the attribute 'offsets' is
102+
not empty.
103+
80104
There are two ways to set shape:
81105
1. reference input: crop input X into the same shape as reference input.
82106
The dimension of reference input should
@@ -146,6 +170,15 @@ class CropOpGrad : public framework::OperatorWithKernel {
146170
ctx->SetOutputDim(x_grad_name, x_dims);
147171
}
148172
}
173+
174+
framework::OpKernelType GetExpectedKernelType(
175+
const framework::ExecutionContext& ctx) const override {
176+
return framework::OpKernelType(
177+
framework::ToDataType(
178+
ctx.Input<framework::LoDTensor>(framework::GradVarName("Out"))
179+
->type()),
180+
ctx.device_context());
181+
}
149182
};
150183

151184
} // namespace operators

paddle/fluid/operators/crop_op.h

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,37 @@ template <typename T, size_t D, int MajorType = Eigen::RowMajor,
2727
using EigenTensor = framework::EigenTensor<T, D, MajorType, IndexType>;
2828
using framework::Tensor;
2929

30+
static std::vector<int> GetOffsets(const framework::ExecutionContext& ctx) {
31+
std::vector<int> res;
32+
int rank = ctx.Input<Tensor>("X")->dims().size();
33+
if (ctx.HasInput("Offsets")) {
34+
PADDLE_ENFORCE(ctx.Attr<std::vector<int>>("offsets").empty(),
35+
"Input 'Offsets' and attribute 'offsets' should not be used "
36+
"at the same time.");
37+
const auto* offsets_tensor = ctx.Input<Tensor>("Offsets");
38+
PADDLE_ENFORCE_EQ(offsets_tensor->dims().size(), 1);
39+
PADDLE_ENFORCE_EQ(
40+
rank, offsets_tensor->dims()[0],
41+
"Offsets size should be equal to dimension size of input tensor.");
42+
const int* offsets_data;
43+
framework::Tensor cpu_tmp_tensor;
44+
if (platform::is_cpu_place(offsets_tensor->place())) {
45+
offsets_data = offsets_tensor->data<int>();
46+
} else {
47+
framework::TensorCopySync(*offsets_tensor, platform::CPUPlace(),
48+
&cpu_tmp_tensor);
49+
offsets_data = cpu_tmp_tensor.data<int>();
50+
}
51+
res = std::vector<int>(offsets_data, offsets_data + rank);
52+
} else {
53+
res = ctx.Attr<std::vector<int>>("offsets");
54+
PADDLE_ENFORCE_EQ(
55+
rank, res.size(),
56+
"Offsets size should be equal to dimension size of input tensor.");
57+
}
58+
return res;
59+
}
60+
3061
template <typename T>
3162
class CropKernel : public framework::OpKernel<T> {
3263
public:
@@ -37,10 +68,7 @@ class CropKernel : public framework::OpKernel<T> {
3768
T* out_data = out->mutable_data<T>(context.GetPlace());
3869
auto x_stride = framework::stride(x->dims());
3970
auto out_stride = framework::stride(out->dims());
40-
auto offsets = context.Attr<std::vector<int>>("offsets");
41-
PADDLE_ENFORCE_EQ(
42-
x->dims().size(), static_cast<int64_t>(offsets.size()),
43-
"Offsets size should be equal to dimension size of input tensor.");
71+
auto offsets = GetOffsets(context);
4472
int64_t offset = 0;
4573
for (size_t i = 0; i < offsets.size(); ++i) {
4674
offset += (x_stride[i] * offsets[i]);
@@ -56,7 +84,7 @@ void CropGradFunction(const framework::ExecutionContext& context) {
5684
if (d_x != nullptr) {
5785
auto* d_out = context.Input<Tensor>(framework::GradVarName("Out"));
5886
d_x->mutable_data<T>(context.GetPlace());
59-
auto offsets = context.Attr<std::vector<int>>("offsets");
87+
auto offsets = GetOffsets(context);
6088
Eigen::array<std::pair<int, int>, D> paddings;
6189
for (size_t i = 0; i < D; ++i) {
6290
paddings[i].first = offsets[i];

paddle/fluid/operators/detail/request_handler.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,6 @@ class RequestHandler {
8080
}
8181
framework::ProgramDesc* program() { return program_; }
8282
framework::Executor* executor() { return executor_; }
83-
std::vector<framework::Variable*>& sparse_vars() { return sparse_vars_; }
8483

8584
// This function processes user's rpc request.
8685
// The implemention is in request_handler_impl.
@@ -113,13 +112,7 @@ class RequestHandler {
113112
std::unordered_map<std::string,
114113
std::shared_ptr<framework::ExecutorPrepareContext>>*
115114
grad_to_prepared_ctx_;
116-
117-
// Record received sparse variables, so that
118-
// we could reset those after execute optimize program
119-
std::vector<framework::Variable*> sparse_vars_;
120115
RPCServer* rpc_server_;
121-
122-
std::mutex sparse_var_mutex_;
123116
};
124117

125118
} // namespace detail

paddle/fluid/operators/detail/request_handler_impl.cc

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,16 +63,22 @@ bool RequestSendHandler::Handle(const std::string& varname,
6363
PADDLE_THROW("sync: Can not find server side var");
6464
return false;
6565
}
66-
6766
if (invar->IsType<framework::SelectedRows>()) {
68-
std::unique_lock<std::mutex> lock(sparse_var_mutex_);
67+
std::unique_lock<std::mutex> lock(mutex_sparse_vars_);
6968
sparse_vars_.push_back(invar);
7069
}
7170
}
72-
7371
return true;
7472
}
7573

74+
void RequestSendHandler::ResetSparseVarRecorder() {
75+
std::unique_lock<std::mutex> lock(mutex_sparse_vars_);
76+
for (auto* var : sparse_vars_) {
77+
var->GetMutable<framework::SelectedRows>()->mutable_rows()->clear();
78+
}
79+
sparse_vars_.clear();
80+
}
81+
7682
bool RequestGetHandler::Handle(const std::string& varname,
7783
framework::Scope* scope,
7884
framework::Variable* invar,

paddle/fluid/operators/detail/request_handler_impl.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,11 @@ class RequestSendHandler final : public RequestHandler {
4141
virtual ~RequestSendHandler() {}
4242
bool Handle(const std::string& varname, framework::Scope* scope,
4343
framework::Variable* var, framework::Variable** outvar) override;
44+
void ResetSparseVarRecorder();
45+
46+
private:
47+
std::mutex mutex_sparse_vars_;
48+
std::vector<framework::Variable*> sparse_vars_;
4449
};
4550

4651
class RequestGetHandler final : public RequestHandler {

0 commit comments

Comments
 (0)