Skip to content

Commit 9a8be9d

Browse files
authored
Merge pull request #10223 from pkuyym/fix-10219
Change `customize_loss_grad` to `use_default_grad_scale`.
2 parents 46c90ea + 13fac42 commit 9a8be9d

File tree

6 files changed

+23
-18
lines changed

6 files changed

+23
-18
lines changed

paddle/fluid/framework/details/multi_devices_graph_builder.cc

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ MultiDevSSAGraphBuilder::MultiDevSSAGraphBuilder(
3434
const std::vector<platform::Place> &places,
3535
const std::string &loss_var_name,
3636
const std::unordered_set<std::string> &params,
37-
const std::vector<Scope *> &local_scopes, bool skip_scale_loss,
37+
const std::vector<Scope *> &local_scopes, bool use_default_grad_scale,
3838
platform::NCCLContextMap *nccl_ctxs)
3939
: loss_var_name_(loss_var_name),
4040
places_(places),
@@ -45,15 +45,15 @@ MultiDevSSAGraphBuilder::MultiDevSSAGraphBuilder(
4545
const std::vector<platform::Place> &places,
4646
const std::string &loss_var_name,
4747
const std::unordered_set<std::string> &params,
48-
const std::vector<Scope *> &local_scopes, bool skip_scale_loss)
48+
const std::vector<Scope *> &local_scopes, bool use_default_grad_scale)
4949
: loss_var_name_(loss_var_name),
5050
places_(places),
5151
local_scopes_(local_scopes) {
5252
#endif
5353
for (auto &p : params) {
5454
grad_names_.insert(GradVarName(p));
5555
}
56-
skip_scale_loss_ = skip_scale_loss;
56+
use_default_grad_scale_ = use_default_grad_scale;
5757
}
5858

5959
void MultiDevSSAGraphBuilder::CreateOpHandleIOs(SSAGraph *result,
@@ -126,8 +126,8 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build(
126126
} else if (IsDistTrainOp(*op, send_op)) {
127127
CreateComputationalOps(&result, *op, 1);
128128
} else if (IsScaleLossOp(*op)) {
129-
// user can customize loss@grad if skip_scale_loss_
130-
if (!skip_scale_loss_) {
129+
// user can customize loss@grad if not use_default_grad_scale_
130+
if (use_default_grad_scale_) {
131131
CreateScaleLossGradOp(&result);
132132
}
133133
is_forwarding = false;

paddle/fluid/framework/details/multi_devices_graph_builder.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
4141
const std::string &loss_var_name,
4242
const std::unordered_set<std::string> &params,
4343
const std::vector<Scope *> &local_scopes,
44-
bool skip_scale_loss);
44+
bool use_default_grad_scale);
4545
#endif
4646

4747
std::unique_ptr<SSAGraph> Build(const ProgramDesc &program) const override;
@@ -59,7 +59,7 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
5959
#ifdef PADDLE_WITH_CUDA
6060
platform::NCCLContextMap *nccl_ctxs_;
6161
#endif
62-
bool skip_scale_loss_;
62+
bool use_default_grad_scale_;
6363

6464
bool IsScaleLossOp(const OpDesc &op) const;
6565

paddle/fluid/framework/parallel_executor.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ ParallelExecutor::ParallelExecutor(
5858
const std::unordered_set<std::string> &bcast_vars,
5959
const ProgramDesc &main_program, const std::string &loss_var_name,
6060
Scope *scope, const std::vector<Scope *> &local_scopes, bool allow_op_delay,
61-
bool customize_scale_loss)
61+
bool use_default_grad_scale)
6262
: member_(new ParallelExecutorPrivate(places)) {
6363
member_->global_scope_ = scope;
6464

@@ -93,11 +93,11 @@ ParallelExecutor::ParallelExecutor(
9393
#ifdef PADDLE_WITH_CUDA
9494
details::MultiDevSSAGraphBuilder builder(
9595
member_->places_, loss_var_name, params, member_->local_scopes_,
96-
customize_scale_loss, member_->nccl_ctxs_.get());
96+
use_default_grad_scale, member_->nccl_ctxs_.get());
9797
#else
9898
details::MultiDevSSAGraphBuilder builder(member_->places_, loss_var_name,
9999
params, member_->local_scopes_,
100-
customize_scale_loss);
100+
use_default_grad_scale);
101101
#endif
102102
auto graph = builder.Build(main_program);
103103

paddle/fluid/framework/parallel_executor.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ class ParallelExecutor {
4040
const ProgramDesc& main_program,
4141
const std::string& loss_var_name, Scope* scope,
4242
const std::vector<Scope*>& local_scopes,
43-
bool allow_op_delay, bool customize_scale_loss);
43+
bool allow_op_delay, bool use_default_grad_scale);
4444

4545
~ParallelExecutor();
4646

paddle/fluid/pybind/pybind.cc

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -502,11 +502,11 @@ All parameter, weight, gradient are variables in Paddle.
502502
const std::unordered_set<std::string> &bcast_vars,
503503
const ProgramDesc &main_program, const std::string &loss_var_name,
504504
Scope *scope, std::vector<Scope *> &local_scopes,
505-
bool allow_op_delay, bool customize_loss_grad) {
506-
new (&self) ParallelExecutor(num_threads, use_event, places,
507-
params, bcast_vars, main_program,
508-
loss_var_name, scope, local_scopes,
509-
allow_op_delay, customize_loss_grad);
505+
bool allow_op_delay, bool use_default_grad_scale) {
506+
new (&self) ParallelExecutor(
507+
num_threads, use_event, places, params, bcast_vars,
508+
main_program, loss_var_name, scope, local_scopes,
509+
allow_op_delay, use_default_grad_scale);
510510
})
511511
.def("bcast_params", &ParallelExecutor::BCastParamsToGPUs)
512512
// NOTE: even we return a vec<Scope*>* to Python use reference policy.

python/paddle/fluid/parallel_executor.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def __init__(self,
3030
num_threads=None,
3131
allow_op_delay=False,
3232
share_vars_from=None,
33-
customize_loss_grad=False):
33+
use_default_grad_scale=True):
3434
"""
3535
ParallelExecutor can run program in parallel.
3636
@@ -46,6 +46,11 @@ def __init__(self,
4646
improve performance in some cases, defalut False.
4747
share_vars_from(ParallelExecutor, default None): If provied,
4848
it will share variables from the specified ParallelExecutor.
49+
use_default_grad_scale(bool, default True): If set True, a default
50+
scale value equal to `1./device_count` would be multiplied to
51+
gradients of each device and scaled gradients would be
52+
aggregated. Otherwise, a customized scale value should be fed
53+
to the network.
4954
5055
Returns:
5156
A ParallelExecutor object.
@@ -124,7 +129,7 @@ def __init__(self,
124129
scope,
125130
local_scopes,
126131
allow_op_delay,
127-
customize_loss_grad)
132+
use_default_grad_scale)
128133
self.scope = scope
129134

130135
def run(self, fetch_list, feed=None, feed_dict=None):

0 commit comments

Comments
 (0)