Skip to content

Commit 69887cd

Browse files
authored
Add LOG Guard and optimize the PyLayer LOG (PaddlePaddle#76010)
* vlog guard * pylayer record forward stack * pylayer vlog opt
1 parent f29f693 commit 69887cd

File tree

11 files changed

+139
-30
lines changed

11 files changed

+139
-30
lines changed

paddle/fluid/eager/pylayer/py_layer_node.cc

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ GradNodePyLayer::operator()(
4646
}
4747
pybind11::gil_scoped_acquire gil;
4848
VLOG(3) << "Running Eager Backward Node: " << name();
49+
if (FLAGS_call_stack_level == 3) {
50+
VLOG(3) << "PyLayer forward call stack: " << this->GetForwardTrace();
51+
}
4952

5053
paddle::small_vector<std::vector<paddle::Tensor>, kSlotSmallVectorSize>
5154
hooked_grads = GradNodePyLayer::ApplyGradientHooks(grads);
@@ -172,10 +175,6 @@ GradNodePyLayer::operator()(
172175
common::errors::External(pybind11::detail::error_string().c_str()));
173176
}
174177

175-
if (FLAGS_call_stack_level == 3) {
176-
this->SetForwardTrace(egr::Controller::Instance().GetPythonStack());
177-
}
178-
179178
VLOG(6) << "PyLayer backward function finish...";
180179

181180
PyObject* outputs_tuple = nullptr;

paddle/fluid/eager/pylayer/py_layer_node.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,13 @@ class GradNodePyLayer : public GradNodeBase {
3232
size_t bwd_out_slot_num)
3333
: GradNodeBase(bwd_in_slot_num, bwd_out_slot_num) {
3434
ctx_ = ctx;
35-
name_ = "GradNodePyLayer_" + std::string(Py_TYPE(ctx_)->tp_name);
35+
std::string str = std::string(Py_TYPE(ctx_)->tp_name);
36+
std::string suffix = "_backward";
37+
if (str.size() >= suffix.size() &&
38+
str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0) {
39+
str.erase(str.size() - suffix.size(), suffix.size());
40+
}
41+
name_ = "GradNodePyLayer_" + str;
3642
Py_INCREF(ctx_);
3743
}
3844

paddle/fluid/pybind/eager_py_layer.cc

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,11 @@ limitations under the License. */
3939
#pragma GCC diagnostic ignored "-Wwrite-strings"
4040
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
4141
COMMON_DECLARE_bool(check_cuda_error);
42-
43-
using egr::ConvertToDistTensor;
44-
42+
COMMON_DECLARE_bool(check_nan_inf);
43+
COMMON_DECLARE_int32(call_stack_level);
4544
COMMON_DECLARE_int64(offload_retry_times);
4645

46+
using egr::ConvertToDistTensor;
4747
namespace paddle::pybind {
4848

4949
PyTypeObject* p_pylayer_type;
@@ -192,7 +192,11 @@ PyObject* pylayer_method_apply(PyObject* cls,
192192
PyObject* kwargs) {
193193
EAGER_TRY
194194
SetPythonStack();
195-
VLOG(6) << "Begin run PyLayer apply...";
195+
std::string classname =
196+
std::string(reinterpret_cast<PyTypeObject*>(cls)->tp_name);
197+
VLOG(3) << classname << ":Running PyLayer Apply ";
198+
VLOG(4) << classname << ":"
199+
<< "Construct PyLayerContext";
196200
PyObject* backward_function =
197201
PyObject_GetAttrString(cls, "_backward_function");
198202
if (!backward_function) {
@@ -230,7 +234,8 @@ PyObject* pylayer_method_apply(PyObject* cls,
230234
forward_args = PyTuple_New(args_size + 1); // NOLINT
231235
Py_INCREF(ctx);
232236
PyTuple_SET_ITEM(forward_args, 0, reinterpret_cast<PyObject*>(ctx));
233-
237+
VLOG(6) << classname << ":Prepare Pylayer forward args ";
238+
VLOG(6) << classname << ":Input size is " << inputs_size;
234239
std::vector<std::vector<egr::AutogradMeta*>> inputs_autograd_meta;
235240
inputs_autograd_meta.reserve(inputs_size);
236241
std::vector<std::vector<paddle::Tensor*>> inputs_tensor;
@@ -374,6 +379,7 @@ PyObject* pylayer_method_apply(PyObject* cls,
374379
}
375380

376381
VLOG(6)
382+
<< classname << ":"
377383
<< "PyLayer forward args is ready, begin call user's forward function...";
378384
// call forward
379385
auto forward_fn = PyObject_GetAttrString(cls, "forward");
@@ -502,7 +508,8 @@ PyObject* pylayer_method_apply(PyObject* cls,
502508
PADDLE_THROW(common::errors::InvalidArgument(
503509
"At least one output of `PyLayer.forward` is a `Tensor`."));
504510
}
505-
VLOG(6) << "PyLayer forward function finish...";
511+
VLOG(6) << classname << ":"
512+
<< "PyLayer forward function finish...";
506513

507514
#ifdef PADDLE_WITH_CUDA
508515
bool has_grad = false;
@@ -539,8 +546,13 @@ PyObject* pylayer_method_apply(PyObject* cls,
539546
std::make_shared<egr::GradNodePyLayer>(reinterpret_cast<PyObject*>(ctx),
540547
outputs_autograd_meta.size(),
541548
inputs_autograd_meta.size());
542-
VLOG(3) << "Create grad node " << grad_node->name() << " addr "
549+
VLOG(3) << classname << ":"
550+
<< "Create grad node " << grad_node->name() << " addr "
543551
<< grad_node;
552+
// For dump call stack
553+
if (FLAGS_check_nan_inf || FLAGS_call_stack_level == 3) {
554+
grad_node->SetForwardTrace(egr::Controller::Instance().GetPythonStack());
555+
}
544556

545557
#ifdef PADDLE_WITH_CUDA
546558
has_grad = true;
@@ -575,7 +587,8 @@ PyObject* pylayer_method_apply(PyObject* cls,
575587
grad_node->SetGradInMeta(*outputs_tensor[i][0], i);
576588
}
577589
}
578-
VLOG(6) << "PyLayer construct backward node finish...";
590+
VLOG(6) << classname << ":"
591+
<< "PyLayer construct backward node finish...";
579592
}
580593

581594
if (outputs_size == 1) {
@@ -586,6 +599,8 @@ PyObject* pylayer_method_apply(PyObject* cls,
586599
Py_XDECREF(outputs_tuple);
587600
}
588601
}
602+
VLOG(3) << classname << ":"
603+
<< "PyLayer output size " << outputs_size;
589604

590605
if (PyList_Check(outputs)) {
591606
Py_XDECREF(outputs_tuple);
@@ -610,7 +625,8 @@ PyObject* pylayer_method_apply(PyObject* cls,
610625
egr::CUDAErrorCheck("pylayer_method_apply " +
611626
std::string(Py_TYPE(ctx)->tp_name) + " finish");
612627
}
613-
628+
VLOG(3) << classname << ":"
629+
<< "Finish PyLayer Apply";
614630
return outputs;
615631
EAGER_CATCH_AND_THROW_RETURN_NULL
616632
}

paddle/fluid/pybind/eager_utils.cc

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -368,31 +368,36 @@ std::shared_ptr<imperative::VarBase> CastPyArg2VarBase(PyObject* obj,
368368
return py::cast<std::shared_ptr<imperative::VarBase>>(obj);
369369
}
370370

371+
/**
372+
* @brief Get the string representation of the current Python stack
373+
*
374+
* Use Python’s traceback module to obtain the current stack information and
375+
* convert it into a string representation for return.
376+
*
377+
* @return String representation of the current Python stack
378+
*/
379+
std::string GetPythonStack() {
380+
pybind11::gil_scoped_acquire gil;
381+
PyObject* mod = PyImport_ImportModule("traceback");
382+
PyObject* traceback_list = PyObject_CallMethod(mod, "format_stack", "");
383+
std::string str = "";
384+
for (Py_ssize_t i = 0; i < PyList_Size(traceback_list); i++) {
385+
PyObject* line = PyList_GetItem(traceback_list, i);
386+
str += py::str(PyUnicode_AsUTF8(line));
387+
}
388+
return str;
389+
}
371390
void SetPythonStack() {
372391
if (FLAGS_check_nan_inf && FLAGS_check_nan_inf_level == 0) {
373392
VLOG(4) << "this is SetPythonStack";
374-
pybind11::gil_scoped_acquire gil;
375-
PyObject* mod = PyImport_ImportModule("traceback");
376-
PyObject* traceback_list = PyObject_CallMethod(mod, "format_stack", "");
377-
std::string str = "";
378-
for (Py_ssize_t i = 0; i < PyList_Size(traceback_list); i++) {
379-
PyObject* line = PyList_GetItem(traceback_list, i);
380-
str += py::str(PyUnicode_AsUTF8(line));
381-
}
393+
std::string str = GetPythonStack();
382394
std::string last = str + egr::Controller::Instance().GetPythonStack();
383395
egr::Controller::Instance().SetPythonStack(last);
384396
}
385397

386398
if (FLAGS_call_stack_level == 3) {
387399
VLOG(6) << "this is SetPythonStack";
388-
pybind11::gil_scoped_acquire gil;
389-
PyObject* mod = PyImport_ImportModule("traceback");
390-
PyObject* traceback_list = PyObject_CallMethod(mod, "format_stack", "");
391-
std::string str = "";
392-
for (Py_ssize_t i = 0; i < PyList_Size(traceback_list); i++) {
393-
PyObject* line = PyList_GetItem(traceback_list, i);
394-
str += py::str(PyUnicode_AsUTF8(line));
395-
}
400+
std::string str = GetPythonStack();
396401
egr::Controller::Instance().SetPythonStack(str);
397402
}
398403
}

paddle/fluid/pybind/eager_utils.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ std::vector<std::string> CastPyArg2VectorOfString(PyObject* obj,
121121
std::shared_ptr<jit::Function> CastPyArg2JitFunction(PyObject* obj,
122122
ssize_t arg_pos);
123123
void SetPythonStack();
124+
std::string GetPythonStack();
124125

125126
PyObject* ToPyObject(int value);
126127
PyObject* ToPyObject(uint32_t value);

paddle/fluid/pybind/pybind.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ limitations under the License. */
8585
#include "paddle/phi/common/bfloat16.h"
8686
#include "paddle/phi/common/float16.h"
8787
#include "paddle/phi/common/int_array.h"
88+
#include "paddle/phi/common/logging_utils.h"
8889
#include "paddle/phi/core/framework/reader.h"
8990
#include "paddle/phi/core/memory/allocation/allocator_strategy.h"
9091
#include "paddle/phi/core/raw_tensor.h"
@@ -3322,6 +3323,7 @@ All parameter, weight, gradient are variables in Paddle.
33223323
// It may cause configuration effects for a single module
33233324
VLOG(3) << "Set the VLOG level of all modules to " << level;
33243325
FLAGS_v = level;
3326+
phi::set_phi_vlog_level(level);
33253327
} else if (py::isinstance<py::dict>(module_levels)) {
33263328
auto module_levels_dict = module_levels.cast<py::dict>();
33273329
for (auto &item : module_levels_dict) {
@@ -3330,8 +3332,10 @@ All parameter, weight, gradient are variables in Paddle.
33303332
if (module_name == "*") {
33313333
VLOG(3) << "Set the VLOG level of all modules to " << level;
33323334
FLAGS_v = level;
3335+
phi::set_phi_vlog_level(level);
33333336
} else {
33343337
google::SetVLOGLevel(module_name.c_str(), level);
3338+
phi::set_phi_vlog_level(module_name.c_str(), level);
33353339
}
33363340
}
33373341
} else {

paddle/phi/common/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,5 @@ collect_srcs(
55
scalar.cc
66
int_array.cc
77
memory_utils.cc
8+
logging_utils.cc
89
port.cc)

paddle/phi/common/logging_utils.cc

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "paddle/phi/common/logging_utils.h"
16+
17+
#include <glog/logging.h>
18+
#include <iostream>
19+
namespace phi {
20+
PADDLE_API void set_phi_vlog_level(int level) { FLAGS_v = level; }
21+
PADDLE_API void set_phi_vlog_level(const char* module_pattern, int level) {
22+
google::SetVLOGLevel(module_pattern, level);
23+
}
24+
} // namespace phi

paddle/phi/common/logging_utils.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
// Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
#pragma once
15+
#include "paddle/common/macros.h"
16+
namespace phi {
17+
PADDLE_API void set_phi_vlog_level(int level);
18+
PADDLE_API void set_phi_vlog_level(const char* module_pattern, int level);
19+
} // namespace phi

python/paddle/base/framework.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8606,3 +8606,20 @@ def pir_op_name_guard(op_name: str) -> Generator[None, None, None]:
86068606
finally:
86078607
if paddle.framework.in_pir_mode() and core._is_bwd_prim_enabled():
86088608
pir.set_comp_op_name(original_comp_op_name)
8609+
8610+
8611+
@signature_safe_contextmanager
8612+
def vlog_guard(module_levels: int | dict) -> Generator[None, None, None]:
8613+
if not isinstance(module_levels, (int, dict)):
8614+
raise TypeError(
8615+
f"The input of vlog_guard must be int or dict but got {type(module_levels).__name__}"
8616+
)
8617+
paddle.base.core.set_vlog_level(module_levels)
8618+
try:
8619+
yield
8620+
finally:
8621+
# Reset the verbose log level to 0
8622+
if isinstance(module_levels, int):
8623+
paddle.base.core.set_vlog_level(0)
8624+
elif isinstance(module_levels, dict):
8625+
paddle.base.core.set_vlog_level(dict.fromkeys(module_levels, 0))

0 commit comments

Comments
 (0)