Skip to content

Commit 624caee

Browse files
committed
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into fix_fetch_op_handle
2 parents e383ea2 + 9707aa6 commit 624caee

24 files changed

+529
-316
lines changed

cmake/inference_lib.cmake

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,12 @@ copy(glog_lib
7070
DSTS ${dst_dir} ${dst_dir}/lib
7171
)
7272

73+
set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/boost/")
74+
copy(boost_lib
75+
SRCS ${BOOST_INCLUDE_DIR}/boost
76+
DSTS ${dst_dir}
77+
)
78+
7379
if(NOT PROTOBUF_FOUND)
7480
set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/protobuf")
7581
copy(protobuf_lib
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#pragma once
16+
17+
namespace paddle {
18+
namespace framework {
19+
namespace details {
20+
21+
struct BuildStrategy {
22+
enum class ReduceStrategy { kAllReduce = 0, kReduce = 1 };
23+
24+
enum class GradientScaleStrategy {
25+
kCoeffNumDevice = 0,
26+
kOne = 1,
27+
kCustomized = 2,
28+
};
29+
30+
ReduceStrategy reduce_{ReduceStrategy::kAllReduce};
31+
GradientScaleStrategy gradient_scale_{GradientScaleStrategy::kCoeffNumDevice};
32+
};
33+
34+
} // namespace details
35+
} // namespace framework
36+
} // namespace paddle
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#pragma once
16+
17+
namespace paddle {
18+
namespace framework {
19+
namespace details {
20+
21+
struct ExecutionStrategy {
22+
size_t num_threads_{0};
23+
bool use_event_{true};
24+
bool allow_op_delay_{false};
25+
};
26+
27+
} // namespace details
28+
} // namespace framework
29+
} // namespace paddle

paddle/fluid/framework/details/multi_devices_graph_builder.cc

Lines changed: 23 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -37,31 +37,26 @@ MultiDevSSAGraphBuilder::MultiDevSSAGraphBuilder(
3737
const std::string &loss_var_name,
3838
const std::unordered_set<std::string> &params,
3939
const std::vector<Scope *> &local_scopes,
40-
platform::NCCLContextMap *nccl_ctxs, bool use_default_grad_scale,
41-
bool balance_parameter_opt_between_cards)
40+
platform::NCCLContextMap *nccl_ctxs, const BuildStrategy &strategy)
4241
: loss_var_name_(loss_var_name),
4342
places_(places),
4443
local_scopes_(local_scopes),
4544
nccl_ctxs_(nccl_ctxs),
46-
balance_parameter_opt_between_cards_(
47-
balance_parameter_opt_between_cards) {
45+
strategy_(strategy) {
4846
#else
4947
MultiDevSSAGraphBuilder::MultiDevSSAGraphBuilder(
5048
const std::vector<platform::Place> &places,
5149
const std::string &loss_var_name,
5250
const std::unordered_set<std::string> &params,
53-
const std::vector<Scope *> &local_scopes, bool use_default_grad_scale,
54-
bool balance_parameter_opt_between_cards)
51+
const std::vector<Scope *> &local_scopes, const BuildStrategy &strategy)
5552
: loss_var_name_(loss_var_name),
5653
places_(places),
5754
local_scopes_(local_scopes),
58-
balance_parameter_opt_between_cards_(
59-
balance_parameter_opt_between_cards) {
55+
strategy_(strategy) {
6056
#endif
6157
for (auto &p : params) {
6258
grad_names_.insert(GradVarName(p));
6359
}
64-
use_default_grad_scale_ = use_default_grad_scale;
6560
}
6661

6762
void MultiDevSSAGraphBuilder::CreateOpHandleIOs(SSAGraph *result,
@@ -146,7 +141,8 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build(
146141
CreateComputationalOps(&result, *op, 1);
147142
} else if (IsScaleLossOp(*op)) {
148143
// user can customize loss@grad if not use_default_grad_scale_
149-
if (use_default_grad_scale_) {
144+
if (strategy_.gradient_scale_ !=
145+
BuildStrategy::GradientScaleStrategy::kCustomized) {
150146
CreateScaleLossGradOp(&result);
151147
}
152148
is_forwarding = false;
@@ -165,19 +161,22 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build(
165161
// broadcast, and each gradient is only broadcast once.
166162
for (auto &og : op->OutputArgumentNames()) {
167163
if (IsParameterGradientOnce(og, &og_has_been_broadcast)) {
168-
if (balance_parameter_opt_between_cards_) {
169-
CreateReduceOp(&result, og, cur_device_id);
170-
var_name_on_devices[cur_device_id].emplace(og);
171-
bcast_var_name_set[cur_device_id].emplace(
172-
og.substr(0, og.size() - strlen(kGradVarSuffix)));
173-
cur_device_id = (cur_device_id + 1) % places_.size();
174-
} else {
175-
if (IsSparseGradient(var_types, og)) {
176-
CreateReduceOp(&result, og, 0);
177-
CreateBroadcastOp(&result, og, 0);
178-
} else {
179-
InsertNCCLAllReduceOp(&result, og);
180-
}
164+
switch (strategy_.reduce_) {
165+
case BuildStrategy::ReduceStrategy::kReduce:
166+
CreateReduceOp(&result, og, cur_device_id);
167+
var_name_on_devices[cur_device_id].emplace(og);
168+
bcast_var_name_set[cur_device_id].emplace(
169+
og.substr(0, og.size() - strlen(kGradVarSuffix)));
170+
cur_device_id = (cur_device_id + 1) % places_.size();
171+
break;
172+
case BuildStrategy::ReduceStrategy::kAllReduce:
173+
if (IsSparseGradient(var_types, og)) {
174+
CreateReduceOp(&result, og, 0);
175+
CreateBroadcastOp(&result, og, 0);
176+
} else {
177+
InsertNCCLAllReduceOp(&result, og);
178+
}
179+
break;
181180
}
182181
}
183182
}
@@ -303,7 +302,7 @@ bool MultiDevSSAGraphBuilder::IsParameterGradientOnce(
303302
int MultiDevSSAGraphBuilder::GetOpDeviceID(
304303
const std::vector<std::unordered_set<std::string>> &var_name_on_devices,
305304
const OpDesc &op) const {
306-
if (!balance_parameter_opt_between_cards_) {
305+
if (strategy_.reduce_ != BuildStrategy::ReduceStrategy::kReduce) {
307306
return -1;
308307
}
309308

paddle/fluid/framework/details/multi_devices_graph_builder.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include <utility>
1818
#include <vector>
1919

20+
#include "paddle/fluid/framework/details/build_strategy.h"
2021
#include "paddle/fluid/framework/details/ssa_graph_builder.h"
2122

2223
namespace paddle {
@@ -36,15 +37,13 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
3637
const std::unordered_set<std::string> &params,
3738
const std::vector<Scope *> &local_scopes,
3839
platform::NCCLContextMap *nccl_ctxs,
39-
bool use_default_grad_scale,
40-
bool balance_parameter_opt_between_cards);
40+
const BuildStrategy &strategy);
4141
#else
4242
MultiDevSSAGraphBuilder(const std::vector<platform::Place> &places,
4343
const std::string &loss_var_name,
4444
const std::unordered_set<std::string> &params,
4545
const std::vector<Scope *> &local_scopes,
46-
bool use_default_grad_scale,
47-
bool balance_parameter_opt_between_cards);
46+
const BuildStrategy &strategy);
4847
#endif
4948

5049
std::unique_ptr<SSAGraph> Build(const ProgramDesc &program) const override;
@@ -62,8 +61,6 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
6261
#ifdef PADDLE_WITH_CUDA
6362
platform::NCCLContextMap *nccl_ctxs_;
6463
#endif
65-
bool balance_parameter_opt_between_cards_;
66-
bool use_default_grad_scale_;
6764

6865
bool IsScaleLossOp(const OpDesc &op) const;
6966

@@ -105,6 +102,9 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
105102
bool IsSparseGradient(
106103
const std::unordered_map<std::string, proto::VarType::Type> &var_types,
107104
const std::string &og) const;
105+
106+
private:
107+
BuildStrategy strategy_;
108108
};
109109
} // namespace details
110110
} // namespace framework

paddle/fluid/framework/details/threaded_ssa_graph_executor.cc

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,17 @@ namespace paddle {
1818
namespace framework {
1919
namespace details {
2020
ThreadedSSAGraphExecutor::ThreadedSSAGraphExecutor(
21-
size_t num_threads, bool use_event,
22-
const std::vector<Scope *> &local_scopes,
21+
const ExecutionStrategy &strategy, const std::vector<Scope *> &local_scopes,
2322
const std::vector<platform::Place> &places,
24-
std::unique_ptr<SSAGraph> &&graph, bool allow_op_delay)
23+
std::unique_ptr<SSAGraph> &&graph)
2524
: SSAGraphExecutor(std::move(graph)),
26-
pool_(num_threads >= 2 ? new ::ThreadPool(num_threads) : nullptr),
25+
pool_(strategy.num_threads_ >= 2 ? new ::ThreadPool(strategy.num_threads_)
26+
: nullptr),
2727
local_scopes_(local_scopes),
2828
places_(places),
2929
fetch_ctxs_(places),
30-
use_event_(use_event),
3130
running_ops_(0),
32-
allow_op_delay_(allow_op_delay) {}
31+
strategy_(strategy) {}
3332

3433
FeedFetchList ThreadedSSAGraphExecutor::Run(
3534
const std::vector<std::string> &fetch_tensors) {
@@ -86,7 +85,7 @@ FeedFetchList ThreadedSSAGraphExecutor::Run(
8685
//
8786
// NOTE: DelayedOps have a lower priority. It will be scheduled after all
8887
// ready_ops have been performed.
89-
if (ready_ops.empty() && allow_op_delay_ && running_ops_ == 0) {
88+
if (ready_ops.empty() && strategy_.allow_op_delay_ && running_ops_ == 0) {
9089
run_all_ops(delayed_ops);
9190
} else {
9291
run_all_ops(ready_ops);
@@ -113,7 +112,7 @@ FeedFetchList ThreadedSSAGraphExecutor::Run(
113112
auto &deps = pending_ops[op];
114113
--deps;
115114
if (deps == 0) {
116-
if (op->IsMultiDeviceTransfer() && allow_op_delay_) {
115+
if (op->IsMultiDeviceTransfer() && strategy_.allow_op_delay_) {
117116
delayed_ops.insert(op);
118117
} else {
119118
ready_ops.insert(op);
@@ -191,7 +190,7 @@ void ThreadedSSAGraphExecutor::RunOp(
191190
auto op_run = [ready_var_q, op, this] {
192191
try {
193192
VLOG(10) << op << " " << op->Name() << " : " << op->DebugString();
194-
op->Run(use_event_);
193+
op->Run(strategy_.use_event_);
195194
VLOG(10) << op << " " << op->Name() << " Done ";
196195
running_ops_--;
197196
ready_var_q->Extend(op->Outputs());

paddle/fluid/framework/details/threaded_ssa_graph_executor.h

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include <functional>
2424
#include "ThreadPool.h" // ThreadPool in thrird party
2525
#include "paddle/fluid/framework/blocking_queue.h"
26+
#include "paddle/fluid/framework/details/execution_strategy.h"
2627
#include "paddle/fluid/framework/details/fetch_op_handle.h"
2728
#include "paddle/fluid/framework/details/ssa_graph_executor.h"
2829

@@ -34,11 +35,10 @@ namespace details {
3435

3536
class ThreadedSSAGraphExecutor : public SSAGraphExecutor {
3637
public:
37-
ThreadedSSAGraphExecutor(size_t num_threads, bool use_event,
38+
ThreadedSSAGraphExecutor(const ExecutionStrategy &strategy,
3839
const std::vector<Scope *> &local_scopes,
3940
const std::vector<platform::Place> &places,
40-
std::unique_ptr<SSAGraph> &&graph,
41-
bool allow_op_delay);
41+
std::unique_ptr<SSAGraph> &&graph);
4242

4343
// Run a SSAGraph by a thread pool
4444
// Use topological sort algorithm
@@ -55,10 +55,8 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor {
5555
std::vector<Scope *> local_scopes_;
5656
std::vector<platform::Place> places_;
5757
platform::DeviceContextPool fetch_ctxs_;
58-
const bool use_event_;
5958
std::unique_ptr<platform::EnforceNotMet> exception_;
6059
std::atomic<int> running_ops_;
61-
bool allow_op_delay_;
6260

6361
void InsertPendingOp(std::unordered_map<OpHandleBase *, size_t> *pending_ops,
6462
OpHandleBase *op_instance) const;
@@ -74,6 +72,9 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor {
7472
std::unordered_map<OpHandleBase *, size_t> *pending_ops,
7573
std::unordered_set<VarHandleBase *> *pending_vars,
7674
BlockingQueue<VarHandleBase *> *ready_vars, FeedFetchList *fetch_data);
75+
76+
private:
77+
ExecutionStrategy strategy_;
7778
};
7879

7980
} // namespace details

paddle/fluid/framework/parallel_executor.cc

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -52,13 +52,12 @@ std::vector<Scope *> &ParallelExecutor::GetLocalScopes() {
5252
}
5353

5454
ParallelExecutor::ParallelExecutor(
55-
size_t num_threads, bool use_event,
5655
const std::vector<platform::Place> &places,
5756
const std::unordered_set<std::string> &params,
5857
const std::unordered_set<std::string> &bcast_vars,
5958
const ProgramDesc &main_program, const std::string &loss_var_name,
60-
Scope *scope, const std::vector<Scope *> &local_scopes, bool allow_op_delay,
61-
bool use_default_grad_scale, bool balance_parameter_opt_between_cards,
59+
Scope *scope, const std::vector<Scope *> &local_scopes,
60+
const ExecutionStrategy &exec_strategy, const BuildStrategy &build_strategy,
6261
size_t num_trainers, size_t trainer_id)
6362
: member_(new ParallelExecutorPrivate(places)) {
6463
member_->global_scope_ = scope;
@@ -100,18 +99,16 @@ ParallelExecutor::ParallelExecutor(
10099
#ifdef PADDLE_WITH_CUDA
101100
details::MultiDevSSAGraphBuilder builder(
102101
member_->places_, loss_var_name, params, member_->local_scopes_,
103-
member_->nccl_ctxs_.get(), use_default_grad_scale,
104-
balance_parameter_opt_between_cards);
102+
member_->nccl_ctxs_.get(), build_strategy);
105103
#else
106-
details::MultiDevSSAGraphBuilder builder(
107-
member_->places_, loss_var_name, params, member_->local_scopes_,
108-
use_default_grad_scale, balance_parameter_opt_between_cards);
104+
details::MultiDevSSAGraphBuilder builder(member_->places_, loss_var_name,
105+
params, member_->local_scopes_,
106+
build_strategy);
109107
#endif
110108
auto graph = builder.Build(main_program);
111109

112110
member_->executor_.reset(new details::ThreadedSSAGraphExecutor(
113-
num_threads, use_event, member_->local_scopes_, places, std::move(graph),
114-
allow_op_delay));
111+
exec_strategy, member_->local_scopes_, places, std::move(graph)));
115112

116113
// Step 3. Create vars in each scope;
117114
for (auto *var : main_program.Block(0).AllVars()) {

0 commit comments

Comments
 (0)