Skip to content

Commit 7194ab7

Browse files
Fix grad dtype diff (PaddlePaddle#75840)
* Fix grad dtype diff * Fix SetBuffers * fix tests * revert * add check in SetBuffers, remove codes * Fix GeneralGrad * refine * fix tests error * Update vlog, fix amp using update_grad_in_meta * refine * fix dense_tensor is nullptr * add GradNodeAccumulation check * update vlog * revert and add test * fix eager_manual * fix test * fix static
1 parent 3816130 commit 7194ab7

33 files changed

+584
-120
lines changed

paddle/fluid/eager/accumulation/accumulation_node.h

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "paddle/fluid/eager/autograd_meta.h"
1818
#include "paddle/fluid/eager/grad_node_info.h"
1919
#include "paddle/fluid/eager/hooks.h"
20+
#include "paddle/fluid/eager/utils.h"
2021
#include "paddle/utils/test_macros.h"
2122

2223
COMMON_DECLARE_int32(call_stack_level);
@@ -26,21 +27,24 @@ namespace egr {
2627
class TEST_API GradNodeAccumulation : public GradNodeBase {
2728
public:
2829
// Constructor: configure fwd input tensors to grad node
29-
explicit GradNodeAccumulation(AutogradMeta* meta) : GradNodeBase(1, 1) {
30-
VLOG(5) << "Construct GradNodeAccumulation";
30+
explicit GradNodeAccumulation(const paddle::Tensor& fwd_tensor)
31+
: GradNodeBase(1, 1) {
32+
VLOG(5) << "Construct GradNodeAccumulation(" << this << ")";
33+
auto* meta = egr::EagerUtils::nullable_autograd_meta(fwd_tensor);
3134
if (meta) {
3235
weak_grad_ = meta->WeakGrad();
3336
}
34-
3537
if (FLAGS_call_stack_level == 3) {
3638
this->SetForwardTrace(egr::Controller::Instance().GetPythonStack());
3739
}
38-
3940
SetDefaultGradInOutMeta();
41+
SetGradInMeta(fwd_tensor, 0);
4042
}
4143

44+
GradNodeAccumulation(const GradNodeAccumulation& other) = default;
45+
4246
~GradNodeAccumulation() override {
43-
VLOG(5) << "Destruct GradNodeAccumulation";
47+
VLOG(5) << "Destruct GradNodeAccumulation(" << this << ")";
4448
}
4549

4650
// Functor: perform backward computations
@@ -68,7 +72,7 @@ class TEST_API GradNodeAccumulation : public GradNodeBase {
6872

6973
std::shared_ptr<GradNodeBase> Copy() const override {
7074
return std::shared_ptr<GradNodeAccumulation>(
71-
new GradNodeAccumulation(nullptr));
75+
new GradNodeAccumulation(*this));
7276
}
7377

7478
void SetFakeEmpty(bool is_fake_empty) { is_fake_empty_ = is_fake_empty; }

paddle/fluid/eager/api/manual/eager_manual/forwards/add_n_fwd_func.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,9 @@ paddle::Tensor add_n_ad_func(const std::vector<paddle::Tensor>& x,
6666
std::vector<egr::AutogradMeta*> x_autograd_meta_vec =
6767
egr::EagerUtils::nullable_autograd_meta(x);
6868
std::vector<egr::AutogradMeta*>* x_autograd_meta = &x_autograd_meta_vec;
69+
// Check LeafTensor if its GradNodeAccumulation TensorMeta is consistent with
70+
// its TensorMeta
71+
egr::CheckGradNodeAccumulation(x);
6972
// Forward API Call
7073
std::string unique_api_name;
7174
if (VLOG_IS_ON(3) || FLAGS_enable_unique_name) {

paddle/fluid/eager/api/manual/eager_manual/forwards/conv2d_fwd_function.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,10 @@ paddle::Tensor conv2d_ad_func(
113113
egr::EagerUtils::nullable_autograd_meta(input);
114114
egr::AutogradMeta* filter_autograd_meta =
115115
egr::EagerUtils::nullable_autograd_meta(filter);
116+
// Check LeafTensor if its GradNodeAccumulation TensorMeta is consistent with
117+
// its TensorMeta
118+
egr::CheckGradNodeAccumulation(input);
119+
egr::CheckGradNodeAccumulation(filter);
116120
// Forward API Call
117121
std::string unique_api_name;
118122
if (VLOG_IS_ON(3) || FLAGS_enable_unique_name) {

paddle/fluid/eager/api/manual/eager_manual/forwards/dtensor_from_local_fwd_func.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ paddle::Tensor dtensor_from_local_ad_function(
4646
// Get Input AutoGradMeta
4747
egr::AutogradMeta* input_autograd_meta =
4848
egr::EagerUtils::nullable_autograd_meta(input);
49+
// Check LeafTensor if its GradNodeAccumulation TensorMeta is consistent with
50+
// its TensorMeta
51+
egr::CheckGradNodeAccumulation(input);
4952
bool trace_backward = egr::Controller::Instance().HasGrad();
5053
bool require_any_grad =
5154
egr::EagerUtils::ComputeRequireGrad(trace_backward, input_autograd_meta);

paddle/fluid/eager/api/manual/eager_manual/forwards/dtensor_to_local_fwd_func.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ paddle::Tensor dtensor_to_local_ad_function(
4848
// Get Input AutoGradMeta
4949
egr::AutogradMeta* input_autograd_meta =
5050
egr::EagerUtils::nullable_autograd_meta(input);
51+
// Check LeafTensor if its GradNodeAccumulation TensorMeta is consistent with
52+
// its TensorMeta
53+
egr::CheckGradNodeAccumulation(input);
5154
bool trace_backward = egr::Controller::Instance().HasGrad();
5255
bool require_any_grad =
5356
egr::EagerUtils::ComputeRequireGrad(trace_backward, input_autograd_meta);

paddle/fluid/eager/api/manual/eager_manual/forwards/multiply_fwd_func.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,10 @@ paddle::Tensor multiply_ad_func(
125125
egr::EagerUtils::nullable_autograd_meta(x);
126126
egr::AutogradMeta* y_autograd_meta =
127127
egr::EagerUtils::nullable_autograd_meta(y);
128+
// Check LeafTensor if its GradNodeAccumulation TensorMeta is consistent with
129+
// its TensorMeta
130+
egr::CheckGradNodeAccumulation(x);
131+
egr::CheckGradNodeAccumulation(y);
128132

129133
// Before log info
130134

paddle/fluid/eager/api/manual/eager_manual/forwards/reshard_fwd_func.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ paddle::Tensor reshard_ad_function(
3737
// Get Input AutoGradMeta
3838
egr::AutogradMeta* input_autograd_meta =
3939
egr::EagerUtils::nullable_autograd_meta(input);
40+
// Check LeafTensor if its GradNodeAccumulation TensorMeta is consistent with
41+
// its TensorMeta
42+
egr::CheckGradNodeAccumulation(input);
4043
bool trace_backward = egr::Controller::Instance().HasGrad();
4144
bool require_any_grad =
4245
egr::EagerUtils::ComputeRequireGrad(trace_backward, input_autograd_meta);

paddle/fluid/eager/api/manual/eager_manual/forwards/sync_batch_norm_fwd_func.cc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,13 @@ sync_batch_norm__ad_func(const paddle::Tensor& x,
131131
egr::EagerUtils::nullable_autograd_meta(scale);
132132
egr::AutogradMeta* bias_autograd_meta =
133133
egr::EagerUtils::nullable_autograd_meta(bias);
134+
// Check LeafTensor if its GradNodeAccumulation TensorMeta is consistent with
135+
// its TensorMeta
136+
egr::CheckGradNodeAccumulation(x);
137+
egr::CheckGradNodeAccumulation(mean);
138+
egr::CheckGradNodeAccumulation(variance);
139+
egr::CheckGradNodeAccumulation(scale);
140+
egr::CheckGradNodeAccumulation(bias);
134141

135142
// Before log info
136143

paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_attention_fwd_func.cc

Lines changed: 12 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -423,11 +423,10 @@ fused_attention_dygraph_function(
423423
grad_node->SetGradOutMeta(QKVBias, 4);
424424

425425
auto QKVBiasOut_accumulation_node =
426-
std::make_shared<egr::GradNodeAccumulation>(p_autograd_QKVBiasOut);
426+
std::make_shared<egr::GradNodeAccumulation>(QKVBiasOut);
427427
egr::EagerUtils::SetOutRankWithSlot(p_autograd_QKVBiasOut, 0);
428428
egr::EagerUtils::SetHistory(p_autograd_QKVBiasOut,
429429
QKVBiasOut_accumulation_node);
430-
QKVBiasOut_accumulation_node->SetGradInMeta(QKVBiasOut, 0);
431430
grad_node->SetGradOutMeta(QKVBiasOut, 11);
432431
}
433432

@@ -436,11 +435,10 @@ fused_attention_dygraph_function(
436435
grad_node->SetTensorWrapper_SrcMaskOut(SrcMaskOut);
437436

438437
auto SrcMaskOut_accumulation_node =
439-
std::make_shared<egr::GradNodeAccumulation>(p_autograd_SrcMaskOut);
438+
std::make_shared<egr::GradNodeAccumulation>(SrcMaskOut);
440439
egr::EagerUtils::SetOutRankWithSlot(p_autograd_SrcMaskOut, 0);
441440
egr::EagerUtils::SetHistory(p_autograd_SrcMaskOut,
442441
SrcMaskOut_accumulation_node);
443-
SrcMaskOut_accumulation_node->SetGradInMeta(SrcMaskOut, 0);
444442
grad_node->SetGradOutMeta(SrcMaskOut, 12);
445443
}
446444

@@ -462,11 +460,10 @@ fused_attention_dygraph_function(
462460
grad_node->SetTensorWrapper_LnOut(LnOut);
463461

464462
auto LnOut_accumulation_node =
465-
std::make_shared<egr::GradNodeAccumulation>(p_autograd_LnOut);
463+
std::make_shared<egr::GradNodeAccumulation>(LnOut);
466464
egr::EagerUtils::SetOutRankWithSlot(p_autograd_LnOut, 0);
467465
egr::EagerUtils::SetHistory(p_autograd_LnOut,
468466
LnOut_accumulation_node);
469-
LnOut_accumulation_node->SetGradInMeta(LnOut, 0);
470467
grad_node->SetGradOutMeta(LnOut, 13);
471468
}
472469
if (LnMean.has_allocation()) {
@@ -490,14 +487,11 @@ fused_attention_dygraph_function(
490487
grad_node->SetTensorWrapper_Ln2Variance(Ln2Variance);
491488

492489
auto BiasDropoutResidualOut_accumulation_node =
493-
std::make_shared<egr::GradNodeAccumulation>(
494-
p_autograd_BiasDropoutResidualOut);
490+
std::make_shared<egr::GradNodeAccumulation>(BiasDropoutResidualOut);
495491
egr::EagerUtils::SetOutRankWithSlot(p_autograd_BiasDropoutResidualOut,
496492
0);
497493
egr::EagerUtils::SetHistory(p_autograd_BiasDropoutResidualOut,
498494
BiasDropoutResidualOut_accumulation_node);
499-
BiasDropoutResidualOut_accumulation_node->SetGradInMeta(
500-
BiasDropoutResidualOut, 0);
501495
grad_node->SetGradOutMeta(BiasDropoutResidualOut, 14);
502496
}
503497

@@ -520,68 +514,59 @@ fused_attention_dygraph_function(
520514
egr::EagerUtils::SetHistory(p_autograd_Y, grad_node);
521515
grad_node->SetGradInMeta(Y, 19);
522516
auto QKVOut_accumulation_node =
523-
std::make_shared<egr::GradNodeAccumulation>(p_autograd_QKVOut);
517+
std::make_shared<egr::GradNodeAccumulation>(QKVOut);
524518
egr::EagerUtils::SetOutRankWithSlot(p_autograd_QKVOut, 0);
525519
egr::EagerUtils::SetHistory(p_autograd_QKVOut, QKVOut_accumulation_node);
526-
QKVOut_accumulation_node->SetGradInMeta(QKVOut, 0);
527520
grad_node->SetGradOutMeta(QKVOut, 15);
528521

529522
auto QKTVOut_accumulation_node =
530-
std::make_shared<egr::GradNodeAccumulation>(p_autograd_QKTVOut);
523+
std::make_shared<egr::GradNodeAccumulation>(QKTVOut);
531524
egr::EagerUtils::SetOutRankWithSlot(p_autograd_QKTVOut, 0);
532525
egr::EagerUtils::SetHistory(p_autograd_QKTVOut,
533526
QKTVOut_accumulation_node);
534-
QKTVOut_accumulation_node->SetGradInMeta(QKTVOut, 0);
535527
grad_node->SetGradOutMeta(QKTVOut, 16);
536528

537529
auto TransposeOut2_accumulation_node =
538-
std::make_shared<egr::GradNodeAccumulation>(p_autograd_TransposeOut2);
530+
std::make_shared<egr::GradNodeAccumulation>(TransposeOut2);
539531
egr::EagerUtils::SetOutRankWithSlot(p_autograd_TransposeOut2, 0);
540532
egr::EagerUtils::SetHistory(p_autograd_TransposeOut2,
541533
TransposeOut2_accumulation_node);
542-
TransposeOut2_accumulation_node->SetGradInMeta(TransposeOut2, 0);
543534
grad_node->SetGradOutMeta(TransposeOut2, 17);
544535

545536
auto QKOut_accumulation_node =
546-
std::make_shared<egr::GradNodeAccumulation>(p_autograd_QKOut);
537+
std::make_shared<egr::GradNodeAccumulation>(QKOut);
547538
egr::EagerUtils::SetOutRankWithSlot(p_autograd_QKOut, 0);
548539
egr::EagerUtils::SetHistory(p_autograd_QKOut, QKOut_accumulation_node);
549-
QKOut_accumulation_node->SetGradInMeta(QKOut, 0);
550540
grad_node->SetGradOutMeta(QKOut, 18);
551541

552542
auto SoftmaxOut_accumulation_node =
553-
std::make_shared<egr::GradNodeAccumulation>(p_autograd_SoftmaxOut);
543+
std::make_shared<egr::GradNodeAccumulation>(SoftmaxOut);
554544
egr::EagerUtils::SetOutRankWithSlot(p_autograd_SoftmaxOut, 0);
555545
egr::EagerUtils::SetHistory(p_autograd_SoftmaxOut,
556546
SoftmaxOut_accumulation_node);
557-
SoftmaxOut_accumulation_node->SetGradInMeta(SoftmaxOut, 0);
558547
grad_node->SetGradOutMeta(SoftmaxOut, 19);
559548

560549
if (AttnDropoutOut.has_allocation()) {
561550
auto AttnDropoutOut_accumulation_node =
562-
std::make_shared<egr::GradNodeAccumulation>(
563-
p_autograd_AttnDropoutOut);
551+
std::make_shared<egr::GradNodeAccumulation>(AttnDropoutOut);
564552
egr::EagerUtils::SetOutRankWithSlot(p_autograd_AttnDropoutOut, 0);
565553
egr::EagerUtils::SetHistory(p_autograd_AttnDropoutOut,
566554
AttnDropoutOut_accumulation_node);
567-
AttnDropoutOut_accumulation_node->SetGradInMeta(AttnDropoutOut, 0);
568555
grad_node->SetGradOutMeta(AttnDropoutOut, 20);
569556
}
570557

571558
auto FMHAOut_accumulation_node =
572-
std::make_shared<egr::GradNodeAccumulation>(p_autograd_FMHAOut);
559+
std::make_shared<egr::GradNodeAccumulation>(FMHAOut);
573560
egr::EagerUtils::SetOutRankWithSlot(p_autograd_FMHAOut, 0);
574561
egr::EagerUtils::SetHistory(p_autograd_FMHAOut,
575562
FMHAOut_accumulation_node);
576-
FMHAOut_accumulation_node->SetGradInMeta(FMHAOut, 0);
577563
grad_node->SetGradOutMeta(FMHAOut, 21);
578564

579565
auto OutLinearOut_accumulation_node =
580-
std::make_shared<egr::GradNodeAccumulation>(p_autograd_OutLinearOut);
566+
std::make_shared<egr::GradNodeAccumulation>(OutLinearOut);
581567
egr::EagerUtils::SetOutRankWithSlot(p_autograd_OutLinearOut, 0);
582568
egr::EagerUtils::SetHistory(p_autograd_OutLinearOut,
583569
OutLinearOut_accumulation_node);
584-
OutLinearOut_accumulation_node->SetGradInMeta(OutLinearOut, 0);
585570
grad_node->SetGradOutMeta(OutLinearOut, 22);
586571
}
587572
}

paddle/fluid/eager/auto_code_generator/generator/eager_gen.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -534,6 +534,9 @@ class {} : public egr::GradNodeBase {{
534534
// Get Input AutoGradMeta
535535
{}
536536
537+
// Check LeafTensor if its GradNodeAccumulation TensorMeta is consistent with its TensorMeta
538+
{}
539+
537540
// Before log info
538541
{}
539542
@@ -1913,6 +1916,7 @@ def GenerateForwardDefinitionAndDeclaration(
19131916
else:
19141917
forward_inplace_map = {}
19151918
indent = GetIndent(1)
1919+
check_input_grad_node_str = ""
19161920

19171921
# Get Function Args
19181922
num_inputs = len(forward_attrs_list) + len(forward_inputs_position_map)
@@ -2328,6 +2332,17 @@ def GenerateForwardDefinitionAndDeclaration(
23282332
outputs_autograd_meta_list.append(output_autograd_meta)
23292333
outputs_autograd_meta_str = "\n".join(outputs_autograd_meta_list)
23302334

2335+
# 3. Check Input Accumulation Node
2336+
check_input_grad_node_str_list = []
2337+
for name, (_, _) in forward_inputs_position_map.items():
2338+
check_input_grad_node_str_list.append(
2339+
f"{indent}egr::CheckGradNodeAccumulation({name});"
2340+
)
2341+
if check_input_grad_node_str_list:
2342+
check_input_grad_node_str = "\n".join(
2343+
check_input_grad_node_str_list
2344+
)
2345+
23312346
# Node Creation
23322347
self.GenerateNodeCreationCodes(is_inplaced=is_inplaced)
23332348
node_creation_str = self.node_creation_str
@@ -2595,6 +2610,7 @@ def GenerateForwardDefinitionAndDeclaration(
25952610
type_autocast_logic_str,
25962611
layout_logic_str,
25972612
inputs_autograd_meta_str,
2613+
check_input_grad_node_str,
25982614
before_log_str,
25992615
compute_require_grad_args_str,
26002616
self.grad_node_name,

0 commit comments

Comments
 (0)