Add LOG Guard and optimize the PyLayer LOG (PaddlePaddle#76010)

DanielSun11 · web-flow · commit 69887cdc2c92 · 2025-10-24T17:56:50.000+08:00
* vlog guard

* pylayer record forward stack

* pylayer vlog opt
diff --git a/paddle/fluid/eager/pylayer/py_layer_node.cc b/paddle/fluid/eager/pylayer/py_layer_node.cc
@@ -46,6 +46,9 @@ GradNodePyLayer::operator()(
   }
   pybind11::gil_scoped_acquire gil;
   VLOG(3) << "Running Eager Backward Node: " << name();
+  if (FLAGS_call_stack_level == 3) {
+    VLOG(3) << "PyLayer forward call stack: " << this->GetForwardTrace();
+  }
 
   paddle::small_vector<std::vector<paddle::Tensor>, kSlotSmallVectorSize>
       hooked_grads = GradNodePyLayer::ApplyGradientHooks(grads);
@@ -172,10 +175,6 @@ GradNodePyLayer::operator()(
         common::errors::External(pybind11::detail::error_string().c_str()));
   }
 
-  if (FLAGS_call_stack_level == 3) {
-    this->SetForwardTrace(egr::Controller::Instance().GetPythonStack());
-  }
-
   VLOG(6) << "PyLayer backward function finish...";
 
   PyObject* outputs_tuple = nullptr;
diff --git a/paddle/fluid/eager/pylayer/py_layer_node.h b/paddle/fluid/eager/pylayer/py_layer_node.h
@@ -32,7 +32,13 @@ class GradNodePyLayer : public GradNodeBase {
                   size_t bwd_out_slot_num)
       : GradNodeBase(bwd_in_slot_num, bwd_out_slot_num) {
     ctx_ = ctx;
-    name_ = "GradNodePyLayer_" + std::string(Py_TYPE(ctx_)->tp_name);
+    std::string str = std::string(Py_TYPE(ctx_)->tp_name);
+    std::string suffix = "_backward";
+    if (str.size() >= suffix.size() &&
+        str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0) {
+      str.erase(str.size() - suffix.size(), suffix.size());
+    }
+    name_ = "GradNodePyLayer_" + str;
     Py_INCREF(ctx_);
   }
 
diff --git a/paddle/fluid/pybind/eager_py_layer.cc b/paddle/fluid/pybind/eager_py_layer.cc
@@ -39,11 +39,11 @@ limitations under the License. */
 #pragma GCC diagnostic ignored "-Wwrite-strings"
 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
 COMMON_DECLARE_bool(check_cuda_error);
-
-using egr::ConvertToDistTensor;
-
+COMMON_DECLARE_bool(check_nan_inf);
+COMMON_DECLARE_int32(call_stack_level);
 COMMON_DECLARE_int64(offload_retry_times);
 
+using egr::ConvertToDistTensor;
 namespace paddle::pybind {
 
 PyTypeObject* p_pylayer_type;
@@ -192,7 +192,11 @@ PyObject* pylayer_method_apply(PyObject* cls,
                                PyObject* kwargs) {
   EAGER_TRY
   SetPythonStack();
-  VLOG(6) << "Begin run PyLayer apply...";
+  std::string classname =
+      std::string(reinterpret_cast<PyTypeObject*>(cls)->tp_name);
+  VLOG(3) << classname << ":Running PyLayer Apply ";
+  VLOG(4) << classname << ":"
+          << "Construct PyLayerContext";
   PyObject* backward_function =
       PyObject_GetAttrString(cls, "_backward_function");
   if (!backward_function) {
@@ -230,7 +234,8 @@ PyObject* pylayer_method_apply(PyObject* cls,
   forward_args = PyTuple_New(args_size + 1);  // NOLINT
   Py_INCREF(ctx);
   PyTuple_SET_ITEM(forward_args, 0, reinterpret_cast<PyObject*>(ctx));
-
+  VLOG(6) << classname << ":Prepare Pylayer forward args ";
+  VLOG(6) << classname << ":Input size is " << inputs_size;
   std::vector<std::vector<egr::AutogradMeta*>> inputs_autograd_meta;
   inputs_autograd_meta.reserve(inputs_size);
   std::vector<std::vector<paddle::Tensor*>> inputs_tensor;
@@ -374,6 +379,7 @@ PyObject* pylayer_method_apply(PyObject* cls,
   }
 
   VLOG(6)
+      << classname << ":"
       << "PyLayer forward args is ready, begin call user's forward function...";
   // call forward
   auto forward_fn = PyObject_GetAttrString(cls, "forward");
@@ -502,7 +508,8 @@ PyObject* pylayer_method_apply(PyObject* cls,
     PADDLE_THROW(common::errors::InvalidArgument(
         "At least one output of `PyLayer.forward` is a `Tensor`."));
   }
-  VLOG(6) << "PyLayer forward function finish...";
+  VLOG(6) << classname << ":"
+          << "PyLayer forward function finish...";
 
 #ifdef PADDLE_WITH_CUDA
   bool has_grad = false;
@@ -539,8 +546,13 @@ PyObject* pylayer_method_apply(PyObject* cls,
         std::make_shared<egr::GradNodePyLayer>(reinterpret_cast<PyObject*>(ctx),
                                                outputs_autograd_meta.size(),
                                                inputs_autograd_meta.size());
-    VLOG(3) << "Create grad node " << grad_node->name() << " addr "
+    VLOG(3) << classname << ":"
+            << "Create grad node " << grad_node->name() << " addr "
             << grad_node;
+    // For dump call stack
+    if (FLAGS_check_nan_inf || FLAGS_call_stack_level == 3) {
+      grad_node->SetForwardTrace(egr::Controller::Instance().GetPythonStack());
+    }
 
 #ifdef PADDLE_WITH_CUDA
     has_grad = true;
@@ -575,7 +587,8 @@ PyObject* pylayer_method_apply(PyObject* cls,
         grad_node->SetGradInMeta(*outputs_tensor[i][0], i);
       }
     }
-    VLOG(6) << "PyLayer construct backward node finish...";
+    VLOG(6) << classname << ":"
+            << "PyLayer construct backward node finish...";
   }
 
   if (outputs_size == 1) {
@@ -586,6 +599,8 @@ PyObject* pylayer_method_apply(PyObject* cls,
       Py_XDECREF(outputs_tuple);
     }
   }
+  VLOG(3) << classname << ":"
+          << "PyLayer output size " << outputs_size;
 
   if (PyList_Check(outputs)) {
     Py_XDECREF(outputs_tuple);
@@ -610,7 +625,8 @@ PyObject* pylayer_method_apply(PyObject* cls,
     egr::CUDAErrorCheck("pylayer_method_apply " +
                         std::string(Py_TYPE(ctx)->tp_name) + " finish");
   }
-
+  VLOG(3) << classname << ":"
+          << "Finish PyLayer Apply";
   return outputs;
   EAGER_CATCH_AND_THROW_RETURN_NULL
 }
diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc
@@ -368,31 +368,36 @@ std::shared_ptr<imperative::VarBase> CastPyArg2VarBase(PyObject* obj,
   return py::cast<std::shared_ptr<imperative::VarBase>>(obj);
 }
 
+/**
+ * @brief Get the string representation of the current Python stack
+ *
+ * Use Python’s traceback module to obtain the current stack information and
+ * convert it into a string representation for return.
+ *
+ * @return String representation of the current Python stack
+ */
+std::string GetPythonStack() {
+  pybind11::gil_scoped_acquire gil;
+  PyObject* mod = PyImport_ImportModule("traceback");
+  PyObject* traceback_list = PyObject_CallMethod(mod, "format_stack", "");
+  std::string str = "";
+  for (Py_ssize_t i = 0; i < PyList_Size(traceback_list); i++) {
+    PyObject* line = PyList_GetItem(traceback_list, i);
+    str += py::str(PyUnicode_AsUTF8(line));
+  }
+  return str;
+}
 void SetPythonStack() {
   if (FLAGS_check_nan_inf && FLAGS_check_nan_inf_level == 0) {
     VLOG(4) << "this is SetPythonStack";
-    pybind11::gil_scoped_acquire gil;
-    PyObject* mod = PyImport_ImportModule("traceback");
-    PyObject* traceback_list = PyObject_CallMethod(mod, "format_stack", "");
-    std::string str = "";
-    for (Py_ssize_t i = 0; i < PyList_Size(traceback_list); i++) {
-      PyObject* line = PyList_GetItem(traceback_list, i);
-      str += py::str(PyUnicode_AsUTF8(line));
-    }
+    std::string str = GetPythonStack();
     std::string last = str + egr::Controller::Instance().GetPythonStack();
     egr::Controller::Instance().SetPythonStack(last);
   }
 
   if (FLAGS_call_stack_level == 3) {
     VLOG(6) << "this is SetPythonStack";
-    pybind11::gil_scoped_acquire gil;
-    PyObject* mod = PyImport_ImportModule("traceback");
-    PyObject* traceback_list = PyObject_CallMethod(mod, "format_stack", "");
-    std::string str = "";
-    for (Py_ssize_t i = 0; i < PyList_Size(traceback_list); i++) {
-      PyObject* line = PyList_GetItem(traceback_list, i);
-      str += py::str(PyUnicode_AsUTF8(line));
-    }
+    std::string str = GetPythonStack();
     egr::Controller::Instance().SetPythonStack(str);
   }
 }
diff --git a/paddle/fluid/pybind/eager_utils.h b/paddle/fluid/pybind/eager_utils.h
@@ -121,6 +121,7 @@ std::vector<std::string> CastPyArg2VectorOfString(PyObject* obj,
 std::shared_ptr<jit::Function> CastPyArg2JitFunction(PyObject* obj,
                                                      ssize_t arg_pos);
 void SetPythonStack();
+std::string GetPythonStack();
 
 PyObject* ToPyObject(int value);
 PyObject* ToPyObject(uint32_t value);
diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc
@@ -85,6 +85,7 @@ limitations under the License. */
 #include "paddle/phi/common/bfloat16.h"
 #include "paddle/phi/common/float16.h"
 #include "paddle/phi/common/int_array.h"
+#include "paddle/phi/common/logging_utils.h"
 #include "paddle/phi/core/framework/reader.h"
 #include "paddle/phi/core/memory/allocation/allocator_strategy.h"
 #include "paddle/phi/core/raw_tensor.h"
@@ -3322,6 +3323,7 @@ All parameter, weight, gradient are variables in Paddle.
           // It may cause configuration effects for a single module
           VLOG(3) << "Set the VLOG level of all modules to " << level;
           FLAGS_v = level;
+          phi::set_phi_vlog_level(level);
         } else if (py::isinstance<py::dict>(module_levels)) {
           auto module_levels_dict = module_levels.cast<py::dict>();
           for (auto &item : module_levels_dict) {
@@ -3330,8 +3332,10 @@ All parameter, weight, gradient are variables in Paddle.
             if (module_name == "*") {
               VLOG(3) << "Set the VLOG level of all modules to " << level;
               FLAGS_v = level;
+              phi::set_phi_vlog_level(level);
             } else {
               google::SetVLOGLevel(module_name.c_str(), level);
+              phi::set_phi_vlog_level(module_name.c_str(), level);
             }
           }
         } else {
diff --git a/paddle/phi/common/CMakeLists.txt b/paddle/phi/common/CMakeLists.txt
@@ -5,4 +5,5 @@ collect_srcs(
   scalar.cc
   int_array.cc
   memory_utils.cc
+  logging_utils.cc
   port.cc)
diff --git a/paddle/phi/common/logging_utils.cc b/paddle/phi/common/logging_utils.cc
@@ -0,0 +1,24 @@
+// Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/common/logging_utils.h"
+
+#include <glog/logging.h>
+#include <iostream>
+namespace phi {
+PADDLE_API void set_phi_vlog_level(int level) { FLAGS_v = level; }
+PADDLE_API void set_phi_vlog_level(const char* module_pattern, int level) {
+  google::SetVLOGLevel(module_pattern, level);
+}
+}  // namespace phi
diff --git a/paddle/phi/common/logging_utils.h b/paddle/phi/common/logging_utils.h
@@ -0,0 +1,19 @@
+// Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include "paddle/common/macros.h"
+namespace phi {
+PADDLE_API void set_phi_vlog_level(int level);
+PADDLE_API void set_phi_vlog_level(const char* module_pattern, int level);
+}  // namespace phi
diff --git a/python/paddle/base/framework.py b/python/paddle/base/framework.py
@@ -8606,3 +8606,20 @@ def pir_op_name_guard(op_name: str) -> Generator[None, None, None]:
     finally:
         if paddle.framework.in_pir_mode() and core._is_bwd_prim_enabled():
             pir.set_comp_op_name(original_comp_op_name)
+
+
+@signature_safe_contextmanager
+def vlog_guard(module_levels: int | dict) -> Generator[None, None, None]:
+    if not isinstance(module_levels, (int, dict)):
+        raise TypeError(
+            f"The input of vlog_guard must be int or dict but got {type(module_levels).__name__}"
+        )
+    paddle.base.core.set_vlog_level(module_levels)
+    try:
+        yield
+    finally:
+        # Reset the verbose log level to 0
+        if isinstance(module_levels, int):
+            paddle.base.core.set_vlog_level(0)
+        elif isinstance(module_levels, dict):
+            paddle.base.core.set_vlog_level(dict.fromkeys(module_levels, 0))
diff --git a/test/legacy_test/test_backward_dump_debug_info.py b/test/legacy_test/test_backward_dump_debug_info.py
@@ -250,5 +250,22 @@ def test_input_invalid(self):
             paddle.base.core.set_vlog_level("3")
 
 
+class TestVlogGuard(unittest.TestCase):
+    # Just run it for coverage ci and don't check the res
+    def test_guard(self):
+        with paddle.base.framework.vlog_guard(0):
+            x = paddle.randn([3, 3], dtype='float16')
+        with paddle.base.framework.vlog_guard({"api": 0}):
+            y = paddle.randn([3, 3], dtype='float16')
+
+    # Check the invalid input
+    def test_error(self):
+        def test_invalid_input():
+            with paddle.base.framework.vlog_guard("api"):
+                x = paddle.randn([3, 3], dtype='float16')
+
+        self.assertRaises(TypeError, test_invalid_input)
+
+
 if __name__ == "__main__":
     unittest.main()