Skip to content

Commit 64305b3

Browse files
committed
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into feature/polish_visit_data_type
2 parents 9f705a4 + 8c7d2e2 commit 64305b3

File tree

236 files changed

+1029
-993
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

236 files changed

+1029
-993
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,9 @@ Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddl
6262
## Installation
6363

6464
It is recommended to check out the
65-
[Docker installation guide](http://www.paddlepaddle.org/docs/develop/documentation/en/getstarted/build_and_install/docker_install_en.html)
65+
[Docker installation guide](http://www.paddlepaddle.org/docs/develop/documentation/fluid/en/build_and_install/docker_install_en.html)
6666
before looking into the
67-
[build from source guide](http://www.paddlepaddle.org/docs/develop/documentation/en/getstarted/build_and_install/build_from_source_en.html).
67+
[build from source guide](http://www.paddlepaddle.org/docs/develop/documentation/fluid/en/build_and_install/build_from_source_en.html).
6868

6969
## Documentation
7070

cmake/external/mkldnn.cmake

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,9 @@ IF(${CBLAS_PROVIDER} STREQUAL "MKLML")
4545
ELSE()
4646
MESSAGE(FATAL_ERROR "Should enable MKLML when build MKLDNN")
4747
ENDIF()
48-
49-
SET(MKLDNN_CFLAG "${CMAKE_C_FLAGS} -Wno-error=strict-overflow")
50-
SET(MKLDNN_CXXFLAG "${CMAKE_CXX_FLAGS} -Wno-error=strict-overflow")
48+
SET(MKLDNN_FLAG "-Wno-error=strict-overflow -Wno-error=unused-result -Wno-unused-result")
49+
SET(MKLDNN_CFLAG "${CMAKE_C_FLAGS} ${MKLDNN_FLAG}")
50+
SET(MKLDNN_CXXFLAG "${CMAKE_CXX_FLAGS} ${MKLDNN_FLAG}")
5151
ExternalProject_Add(
5252
${MKLDNN_PROJECT}
5353
${EXTERNAL_PROJECT_LOG_ARGS}
@@ -61,6 +61,7 @@ ExternalProject_Add(
6161
CMAKE_ARGS -DMKLROOT=${MKLML_ROOT}
6262
CMAKE_ARGS -DCMAKE_C_FLAGS=${MKLDNN_CFLAG}
6363
CMAKE_ARGS -DCMAKE_CXX_FLAGS=${MKLDNN_CXXFLAG}
64+
CMAKE_ARGS -DWITH_TEST=OFF -DWITH_EXAMPLE=OFF
6465
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLDNN_INSTALL_DIR}
6566
-DMKLROOT:PATH=${MKLML_ROOT}
6667
)

cmake/external/mklml.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ ENDIF()
2727
INCLUDE(ExternalProject)
2828

2929
SET(MKLML_PROJECT "extern_mklml")
30-
SET(MKLML_VER "mklml_lnx_2018.0.1.20171007")
30+
SET(MKLML_VER "mklml_lnx_2018.0.3.20180406")
3131
SET(MKLML_URL "http://paddlepaddledeps.bj.bcebos.com/${MKLML_VER}.tgz")
3232
SET(MKLML_SOURCE_DIR "${THIRD_PARTY_PATH}/mklml")
3333
SET(MKLML_DOWNLOAD_DIR "${MKLML_SOURCE_DIR}/src/${MKLML_PROJECT}")

doc/fluid/design/concepts/lod_tensor.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ into offsets
155155
3 2+3 4+5 1+9 2+10 3+12
156156
```
157157

158-
so we know that the first sentence is from word 0 to word 3, and the second sentence from work 3 to word 5.
158+
so we know that the first sentence is from word 0 to word 3, and the second sentence from word 3 to word 5.
159159

160160
Similarly, the lengths in the top level LoD
161161

paddle/fluid/framework/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ cc_library(data_transform SRCS data_transform.cc DEPS math_function tensor
5757
cc_library(attribute SRCS attribute.cc DEPS framework_proto boost)
5858
cc_test(program_desc_test SRCS program_desc_test.cc DEPS proto_desc
5959
device_context)
60-
cc_library(op_proto_maker SRCS op_proto_maker.cc DEPS framework_proto attribute)
60+
cc_library(op_proto_maker SRCS op_proto_maker.cc DEPS framework_proto attribute glog)
6161
cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker)
6262
cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto)
6363
cc_library(shape_inference SRCS shape_inference.cc DEPS ddim attribute device_context)

paddle/fluid/framework/data_device_transform_test.cu

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,7 @@ struct AddFunctor {
3232

3333
class OpKernelTestProtoAndCheckerMaker : public OpProtoAndCheckerMaker {
3434
public:
35-
OpKernelTestProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker)
36-
: OpProtoAndCheckerMaker(proto, op_checker) {
35+
void Make() {
3736
AddInput("input", "input1 of test op");
3837
AddOutput("output", "output of test op");
3938
AddAttr<bool>("use_gpu", "force to use gpu kernel").SetDefault(false);

paddle/fluid/framework/details/computation_op_handle.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ struct ComputationOpHandle : public OpHandleBase {
3636
protected:
3737
void RunImpl() override;
3838

39-
virtual bool NeedWait(VarHandleBase *in_var);
39+
bool NeedWait(VarHandleBase *in_var) override;
4040

4141
private:
4242
std::unique_ptr<OperatorBase> op_;

paddle/fluid/framework/details/fetch_op_handle.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ struct FetchOpHandle : public OpHandleBase {
4242
protected:
4343
void RunImpl() override;
4444

45-
virtual void WaitInputVarGenerated(const platform::Place &place);
45+
void WaitInputVarGenerated(const platform::Place &place) override;
4646

4747
private:
4848
FeedFetchList *data_;

paddle/fluid/framework/details/multi_devices_graph_builder.cc

Lines changed: 64 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,20 +37,26 @@ MultiDevSSAGraphBuilder::MultiDevSSAGraphBuilder(
3737
const std::string &loss_var_name,
3838
const std::unordered_set<std::string> &params,
3939
const std::vector<Scope *> &local_scopes,
40-
platform::NCCLContextMap *nccl_ctxs, bool use_default_grad_scale)
40+
platform::NCCLContextMap *nccl_ctxs, bool use_default_grad_scale,
41+
bool balance_parameter_opt_between_cards)
4142
: loss_var_name_(loss_var_name),
4243
places_(places),
4344
local_scopes_(local_scopes),
44-
nccl_ctxs_(nccl_ctxs) {
45+
nccl_ctxs_(nccl_ctxs),
46+
balance_parameter_opt_between_cards_(
47+
balance_parameter_opt_between_cards) {
4548
#else
4649
MultiDevSSAGraphBuilder::MultiDevSSAGraphBuilder(
4750
const std::vector<platform::Place> &places,
4851
const std::string &loss_var_name,
4952
const std::unordered_set<std::string> &params,
50-
const std::vector<Scope *> &local_scopes, bool use_default_grad_scale)
53+
const std::vector<Scope *> &local_scopes, bool use_default_grad_scale,
54+
bool balance_parameter_opt_between_cards)
5155
: loss_var_name_(loss_var_name),
5256
places_(places),
53-
local_scopes_(local_scopes) {
57+
local_scopes_(local_scopes),
58+
balance_parameter_opt_between_cards_(
59+
balance_parameter_opt_between_cards) {
5460
#endif
5561
for (auto &p : params) {
5662
grad_names_.insert(GradVarName(p));
@@ -124,6 +130,12 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build(
124130
// Find "send" op first for split is in front of send.
125131
OpDesc *send_op = GetSendOpDesc(program);
126132

133+
size_t cur_device_id = 0;
134+
std::vector<std::unordered_set<std::string>> var_name_on_devices;
135+
std::vector<std::unordered_set<std::string>> bcast_var_name_set;
136+
var_name_on_devices.resize(places_.size());
137+
bcast_var_name_set.resize(places_.size());
138+
127139
bool is_forwarding = true;
128140
for (auto *op : program.Block(0).AllOps()) {
129141
if (op->Type() == "send") {
@@ -139,24 +151,47 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build(
139151
}
140152
is_forwarding = false;
141153
} else {
142-
CreateComputationalOps(&result, *op, places_.size());
154+
int op_dev_id = GetOpDeviceID(var_name_on_devices, *op);
155+
if (op_dev_id == -1) { // var on all device
156+
CreateComputationalOps(&result, *op, places_.size());
157+
} else {
158+
CreateComputationalOp(&result, *op, op_dev_id);
159+
for (auto &var_name : op->OutputArgumentNames()) {
160+
var_name_on_devices[op_dev_id].emplace(var_name);
161+
}
162+
}
143163
if (!is_forwarding && places_.size() > 1) {
144164
// Currently, we assume that once gradient is generated, it can be
145165
// broadcast, and each gradient is only broadcast once.
146166
for (auto &og : op->OutputArgumentNames()) {
147167
if (IsParameterGradientOnce(og, &og_has_been_broadcast)) {
148-
if (IsSparseGradient(var_types, og)) {
149-
CreateReduceOp(&result, og, 0);
150-
CreateBroadcastOp(&result, og, 0);
168+
if (balance_parameter_opt_between_cards_) {
169+
CreateReduceOp(&result, og, cur_device_id);
170+
var_name_on_devices[cur_device_id].emplace(og);
171+
bcast_var_name_set[cur_device_id].emplace(
172+
og.substr(0, og.size() - strlen(kGradVarSuffix)));
173+
cur_device_id = (cur_device_id + 1) % places_.size();
151174
} else {
152-
InsertNCCLAllReduceOp(&result, og);
175+
if (IsSparseGradient(var_types, og)) {
176+
CreateReduceOp(&result, og, 0);
177+
CreateBroadcastOp(&result, og, 0);
178+
} else {
179+
InsertNCCLAllReduceOp(&result, og);
180+
}
153181
}
154182
}
155183
}
156184
}
157185
}
158186
}
159187

188+
// Insert BCast Ops
189+
for (size_t dev_id = 0; dev_id < bcast_var_name_set.size(); ++dev_id) {
190+
auto &to_bcast_set = bcast_var_name_set[dev_id];
191+
for (auto &bcast_name : to_bcast_set) {
192+
CreateBroadcastOp(&result, bcast_name, dev_id);
193+
}
194+
}
160195
/*
161196
Dependency graph has been constructed. However, there are still data
162197
harzaeds need to be handled.
@@ -265,6 +300,26 @@ bool MultiDevSSAGraphBuilder::IsParameterGradientOnce(
265300
return is_pg_once;
266301
}
267302

303+
int MultiDevSSAGraphBuilder::GetOpDeviceID(
304+
const std::vector<std::unordered_set<std::string>> &var_name_on_devices,
305+
const OpDesc &op) const {
306+
if (!balance_parameter_opt_between_cards_) {
307+
return -1;
308+
}
309+
310+
int var_dev_id = -1;
311+
for (auto &var_name : op.InputArgumentNames()) {
312+
if (var_dev_id != -1) break;
313+
for (size_t i = 0; i < var_name_on_devices.size(); ++i) {
314+
if (var_name_on_devices[i].count(var_name)) {
315+
var_dev_id = static_cast<int>(i);
316+
break;
317+
}
318+
}
319+
}
320+
return var_dev_id;
321+
}
322+
268323
void MultiDevSSAGraphBuilder::CreateScaleLossGradOp(SSAGraph *result) const {
269324
for (size_t i = 0; i < places_.size(); ++i) {
270325
// Insert ScaleCost OpHandle

paddle/fluid/framework/details/multi_devices_graph_builder.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,15 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
3636
const std::unordered_set<std::string> &params,
3737
const std::vector<Scope *> &local_scopes,
3838
platform::NCCLContextMap *nccl_ctxs,
39-
bool use_default_grad_scale);
39+
bool use_default_grad_scale,
40+
bool balance_parameter_opt_between_cards);
4041
#else
4142
MultiDevSSAGraphBuilder(const std::vector<platform::Place> &places,
4243
const std::string &loss_var_name,
4344
const std::unordered_set<std::string> &params,
4445
const std::vector<Scope *> &local_scopes,
45-
bool use_default_grad_scale);
46+
bool use_default_grad_scale,
47+
bool balance_parameter_opt_between_cards);
4648
#endif
4749

4850
std::unique_ptr<SSAGraph> Build(const ProgramDesc &program) const override;
@@ -60,6 +62,7 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
6062
#ifdef PADDLE_WITH_CUDA
6163
platform::NCCLContextMap *nccl_ctxs_;
6264
#endif
65+
bool balance_parameter_opt_between_cards_;
6366
bool use_default_grad_scale_;
6467

6568
bool IsScaleLossOp(const OpDesc &op) const;
@@ -84,6 +87,10 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
8487
const std::string &og,
8588
std::unordered_set<std::string> *og_has_been_broadcast) const;
8689

90+
int GetOpDeviceID(
91+
const std::vector<std::unordered_set<std::string>> &var_name_on_devices,
92+
const OpDesc &op) const;
93+
8794
void InsertNCCLAllReduceOp(SSAGraph *result, const std::string &og) const;
8895

8996
void CreateBroadcastOp(SSAGraph *result, const std::string &p_name,

0 commit comments

Comments
 (0)