Skip to content

Commit 6de2058

Browse files
authored
Fix operator type record in profiler [cherry-pick PR44582] (#44654)
* fix record event for operator type in new dygraph (#44582) * fix new dygraph record event for op * update unit test * fix file mode
1 parent b71833e commit 6de2058

File tree

6 files changed

+536
-432
lines changed

6 files changed

+536
-432
lines changed

paddle/fluid/eager/auto_code_generator/eager_generator.cc

Lines changed: 228 additions & 120 deletions
Large diffs are not rendered by default.

paddle/fluid/eager/auto_code_generator/final_state_generator/python_c_gen.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
2-
#
2+
#
33
# Licensed under the Apache License, Version 2.0 (the "License");
44
# you may not use this file except in compliance with the License.
55
# You may obtain a copy of the License at
6-
#
6+
#
77
# http://www.apache.org/licenses/LICENSE-2.0
8-
#
8+
#
99
# Unless required by applicable law or agreed to in writing, software
1010
# distributed under the License is distributed on an "AS IS" BASIS,
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -71,7 +71,7 @@ def FindParsingFunctionFromAttributeType(atype):
7171

7272

7373
RECORD_EVENT_TEMPLATE = \
74-
" paddle::platform::RecordEvent {}(\"{} {}\", paddle::platform::TracerEventType::Operator, 1);"
74+
"paddle::platform::RecordEvent {}(\"{} {}\", paddle::platform::TracerEventType::UserDefined, 1);"
7575

7676

7777
RETURN_INPLACE_PYOBJECT_TEMPLATE = \
@@ -253,6 +253,7 @@ def FindParsingFunctionFromAttributeType(atype):
253253
## Generator Classes ##
254254
#######################
255255
class PythonCSingleFunctionGenerator(FunctionGeneratorBase):
256+
256257
def __init__(self, forward_api_contents, namespace):
257258
# Members from Parent:
258259
#self.namespace
@@ -265,7 +266,7 @@ def __init__(self, forward_api_contents, namespace):
265266
#self.forward_outputs_position_map
266267
#self.optional_inputs
267268
#self.no_need_buffers
268-
#self.intermediate_outputs
269+
#self.intermediate_outputs
269270
#self.inplace_map
270271
FunctionGeneratorBase.__init__(self, forward_api_contents, namespace)
271272

@@ -327,16 +328,16 @@ def GeneratePythonCFunction(self):
327328
set_device_str = FUNCTION_SET_DEVICE_TEMPLATE.format(expected_place_str)
328329

329330
# Generate Dygraph Function Call Logic
330-
num_args = len(forward_inputs_position_map.keys()) + len(
331-
orig_forward_attrs_list)
331+
num_args = len(
332+
forward_inputs_position_map.keys()) + len(orig_forward_attrs_list)
332333
dygraph_function_call_list = ["" for i in range(num_args)]
333334
for name, (_, pos) in forward_inputs_position_map.items():
334335
dygraph_function_call_list[pos] = f"{name}"
335336
for name, _, _, pos in orig_forward_attrs_list:
336337
dygraph_function_call_list[pos] = f"{name}"
337338
dygraph_function_call_str = ",".join(dygraph_function_call_list)
338339

339-
# Generate Python-C Function Definitions
340+
# Generate Python-C Function Definitions
340341
if is_forward_only:
341342
fwd_function_name = FUNCTION_NAME_TEMPLATE.format(
342343
"paddle::experimental::", namespace, forward_api_name)
@@ -441,8 +442,9 @@ def run(self):
441442

442443

443444
class PythonCYamlGenerator(YamlGeneratorBase):
445+
444446
def __init__(self, path):
445-
# Parent members:
447+
# Parent members:
446448
# self.namespace
447449
# self.api_yaml_path
448450
# self.forward_api_list
@@ -457,8 +459,8 @@ def GeneratePythonCFunctions(self):
457459
forward_api_list = self.forward_api_list
458460

459461
for forward_api_content in forward_api_list:
460-
f_generator = PythonCSingleFunctionGenerator(forward_api_content,
461-
namespace)
462+
f_generator = PythonCSingleFunctionGenerator(
463+
forward_api_content, namespace)
462464
status = f_generator.run()
463465

464466
if status == True:

paddle/fluid/eager/backward.cc

Lines changed: 44 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,10 @@
3030
namespace egr {
3131

3232
/*
33-
* GeneralGrad is Helpper class to implement custom grad operation between
34-
* outputs and inputs.
35-
*
36-
* **/
33+
* GeneralGrad is Helpper class to implement custom grad operation between
34+
* outputs and inputs.
35+
*
36+
* **/
3737
class GeneralGrad {
3838
public:
3939
static GeneralGrad& Instance() { return *general_grad_; }
@@ -64,7 +64,8 @@ class GeneralGrad {
6464
paddle::platform::errors::Fatal(
6565
"There is no grad op for %s:[%d] or it's"
6666
"stop_gradient=True.",
67-
msg, i));
67+
msg,
68+
i));
6869
if (is_no_grad_vars) {
6970
(no_grad_var_nodes_inputmeta_map)[target_node] = auto_grad_meta;
7071
} else { // normal input
@@ -248,7 +249,8 @@ class GeneralGrad {
248249

249250
std::vector<paddle::experimental::Tensor> GetResults(
250251
const std::vector<paddle::experimental::Tensor>& inputs,
251-
bool allow_unused, bool create_graph) {
252+
bool allow_unused,
253+
bool create_graph) {
252254
VLOG(6) << "Running in GetResults";
253255
if (inputs.empty()) return {};
254256

@@ -276,7 +278,8 @@ class GeneralGrad {
276278
tensor_auto_grad_meta->SetStopGradient(!create_graph);
277279
results.emplace_back(iter->second);
278280
} else {
279-
PADDLE_ENFORCE_EQ(allow_unused, true,
281+
PADDLE_ENFORCE_EQ(allow_unused,
282+
true,
280283
paddle::platform::errors::InvalidArgument(
281284
"The %d-th input does not appear in the backward "
282285
"graph. Please check the input tensor or set "
@@ -493,7 +496,8 @@ std::unordered_map<GradNodeBase*, int> getInDegreeMap(
493496
void EnforceGradNodeHasInput(GradNodeBase* node) {
494497
VLOG(6) << "Running in EnforceGradNodeHasInput";
495498
PADDLE_ENFORCE_NE(
496-
node->IsTensorWrappersCleared(), true,
499+
node->IsTensorWrappersCleared(),
500+
true,
497501
paddle::platform::errors::Fatal(
498502
"The TensorWrappers of %s do not exist. This may be because:\n"
499503
"You calculate backward twice for the same subgraph without "
@@ -509,10 +513,13 @@ void DuplicateCheck(const std::vector<paddle::experimental::Tensor>& inputs,
509513
for (auto in : inputs) {
510514
AutogradMeta* auto_grad_meta = EagerUtils::unsafe_autograd_meta(in);
511515
PADDLE_ENFORCE_EQ(
512-
visisted_ins.count(auto_grad_meta), 0,
516+
visisted_ins.count(auto_grad_meta),
517+
0,
513518
paddle::platform::errors::AlreadyExists(
514-
"%s contain duplicate tensor %s, please check %s carefully.", msg,
515-
in.name(), msg));
519+
"%s contain duplicate tensor %s, please check %s carefully.",
520+
msg,
521+
in.name(),
522+
msg));
516523
visisted_ins.insert(auto_grad_meta);
517524
}
518525
}
@@ -522,7 +529,8 @@ GeneralGrad* GeneralGrad::general_grad_ = new GeneralGrad();
522529
std::vector<paddle::experimental::Tensor> RunBackward(
523530
const std::vector<paddle::experimental::Tensor>& tensors, // output
524531
const std::vector<paddle::experimental::Tensor>& grad_tensors,
525-
bool retain_graph, bool create_graph = false,
532+
bool retain_graph,
533+
bool create_graph = false,
526534
const std::vector<paddle::experimental::Tensor>& inputs = {},
527535
bool allow_unused = false,
528536
const std::vector<paddle::experimental::Tensor>& no_grad_vars = {}) {
@@ -631,8 +639,8 @@ std::vector<paddle::experimental::Tensor> RunBackward(
631639

632640
if (is_general_grad) {
633641
// Prepare several vital preprocess for GeneralGrad
634-
GeneralGrad::Instance().PreparedForGeneralGrad(inputs, no_grad_vars, &queue,
635-
node_input_buffers_dict);
642+
GeneralGrad::Instance().PreparedForGeneralGrad(
643+
inputs, no_grad_vars, &queue, node_input_buffers_dict);
636644
}
637645

638646
VLOG(6) << " startup_ops' size is :" << queue.size();
@@ -651,7 +659,8 @@ std::vector<paddle::experimental::Tensor> RunBackward(
651659

652660
paddle::platform::RecordEvent node_record_event(
653661
std::string((*node).name()) + " grad_node",
654-
paddle::platform::TracerEventType::Operator, 1);
662+
paddle::platform::TracerEventType::Operator,
663+
1);
655664

656665
if (queue.size() > 1 && node_in_degree_map[node] != 0) {
657666
queue.pop();
@@ -716,7 +725,8 @@ std::vector<paddle::experimental::Tensor> RunBackward(
716725
"Number of edges should be either empty ( for leaf node "
717726
") or the same as number of output grad tensors, but we "
718727
"got edges size is: %d, grad_output size is: %d",
719-
edges.size(), grad_output_tensors.size()));
728+
edges.size(),
729+
grad_output_tensors.size()));
720730

721731
for (size_t i = 0; i < edges.size(); i++) {
722732
for (size_t j = 0; j < edges[i].size(); j++) {
@@ -739,7 +749,8 @@ std::vector<paddle::experimental::Tensor> RunBackward(
739749
}
740750

741751
PADDLE_ENFORCE_LT(
742-
j, grad_output_tensors[i].size(),
752+
j,
753+
grad_output_tensors[i].size(),
743754
paddle::platform::errors::Fatal(
744755
"Rank of grad_output_tensors should be less than "
745756
"grad_output_tensors[i].size(), which is: %d. This error may "
@@ -771,9 +782,10 @@ std::vector<paddle::experimental::Tensor> RunBackward(
771782
VLOG(6) << "Sum grad inputs for edge slot: " << edge_rank.first
772783
<< ", rank: " << edge_rank.second;
773784

774-
node_input_buffers_dict[next_node]->add(
775-
edge_rank.first, edge_rank.second, grad_output_tensor,
776-
create_graph);
785+
node_input_buffers_dict[next_node]->add(edge_rank.first,
786+
edge_rank.second,
787+
grad_output_tensor,
788+
create_graph);
777789

778790
// Update queue
779791
node_in_degree_map[next_node]--;
@@ -810,7 +822,7 @@ void Backward(
810822
bool retain_graph) {
811823
VLOG(6) << "Run in Backward";
812824
paddle::platform::RecordEvent backward_record_event(
813-
"backward", paddle::platform::TracerEventType::Operator, 1);
825+
"backward", paddle::platform::TracerEventType::UserDefined, 1);
814826
RunBackward(tensors, grad_tensors, retain_graph);
815827
phi::autotune::AutoTuneStatus::Instance().Update();
816828
}
@@ -819,14 +831,22 @@ std::vector<paddle::experimental::Tensor> Grad(
819831
const std::vector<paddle::experimental::Tensor>& tensors, // outputs
820832
const std::vector<paddle::experimental::Tensor>& inputs,
821833
const std::vector<paddle::experimental::Tensor>& grad_tensors,
822-
bool retain_graph, bool create_graph, bool only_inputs, bool allow_unused,
834+
bool retain_graph,
835+
bool create_graph,
836+
bool only_inputs,
837+
bool allow_unused,
823838
const std::vector<paddle::experimental::Tensor>& no_grad_vars) {
824839
VLOG(6) << "Run in Grad";
825840

826841
DuplicateCheck(inputs, true /* is_input */);
827842
DuplicateCheck(tensors, false /* is_input */);
828843

829-
return RunBackward(tensors, grad_tensors, retain_graph, create_graph, inputs,
830-
allow_unused, no_grad_vars);
844+
return RunBackward(tensors,
845+
grad_tensors,
846+
retain_graph,
847+
create_graph,
848+
inputs,
849+
allow_unused,
850+
no_grad_vars);
831851
}
832852
} // namespace egr

paddle/fluid/platform/profiler/chrometracing_logger.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -588,7 +588,7 @@ void ChromeTracingLogger::StartLog() {
588588
std::string(
589589
R"JSON(
590590
{
591-
"id": %d, "name": "%s", "totalGlobalMem": %u,
591+
"id": %d, "name": "%s", "totalGlobalMem": %llu,
592592
"computeMajor": %d, "computeMinor": %d,
593593
"maxThreadsPerBlock": %d, "maxThreadsPerMultiprocessor": %d,
594594
"regsPerBlock": %d, "regsPerMultiprocessor": %d, "warpSize": %d,
@@ -618,7 +618,7 @@ void ChromeTracingLogger::StartLog() {
618618
std::string(
619619
R"JSON(
620620
{
621-
"id": %d, "name": "%s", "totalGlobalMem": %u,
621+
"id": %d, "name": "%s", "totalGlobalMem": %llu,
622622
"computeMajor": %d, "computeMinor": %d,
623623
"maxThreadsPerBlock": %d, "maxThreadsPerMultiprocessor": %d,
624624
"regsPerBlock": %d, "regsPerMultiprocessor": %d, "warpSize": %d,

0 commit comments

Comments
 (0)