[Cherry-Pick] Fix the segfault when using to_tensor in PyLayer. (#33303) (#33518)

hbwx24 · web-flow · commit 0079e0b1af74 · 2021-06-15T11:34:16.000+08:00
修复pylayer 返回to_tensor时触发段错误的bug。 原因： 如果在Python端修改了stop_gradient属性，c++ 端InnerSetOverridedStopGradient 无法修改stop_gradient属性，在c++端调用SetOverridedStopGradient修改stop_gradient属性。 to_tensor产生的tensor的grad var的DataType为默认值（-1），在backward的过程中grad var的DataType不能为默认值（-1），因此在调用ForwardDataType设置grad var的DataType。 原始PR：#33303
diff --git a/paddle/fluid/imperative/py_layer_fwd.h b/paddle/fluid/imperative/py_layer_fwd.h
@@ -17,6 +17,7 @@
 #include <string>
 #include <vector>
 #include "paddle/fluid/imperative/layer.h"
+#include "paddle/fluid/imperative/prepared_operator.h"
 #include "paddle/fluid/imperative/tracer.h"
 
 #include "paddle/fluid/framework/op_registry.h"
@@ -32,7 +33,17 @@ bool RequiredGrad(const NameVarBaseMap& ins, const NameVarBaseMap& outs) {
   for (const auto& name_pair : ins) {
     for (const auto& var_base : name_pair.second) {
       if (!var_base->OverridedStopGradient()) {
-        PassStopGradient(outs, var_base->OverridedStopGradient());
+        for (const auto& pair : outs) {
+          for (const auto& var : pair.second) {
+            if (var) {
+              var->SetOverridedStopGradient(false);
+              SetForwardDataTypeOfGradVar(var);
+              VLOG(3) << "Set output: " << var->Name()
+                      << "'s OverridedStopGradient as "
+                      << var->OverridedStopGradient();
+            }
+          }
+        }
         return true;
       }
     }
@@ -78,28 +89,36 @@ py::object PyLayerApply(const platform::Place& place, const py::handle& cls,
   // process args,`input_vars` only collect `imperative::VarBase`
   if (!args.empty()) {
     for (auto ptr = args.begin(); ptr != args.end(); ptr++) {
-      try {
-        if (Py_None != ptr->ptr()) {
+      // Only collect Tensor type in 'args' and pass them to backward. Ignore
+      // other types of input temporarily.
+      if (py::isinstance<imperative::VarBase>(*ptr)) {
+        try {
           auto a = ptr->cast<std::shared_ptr<VarBase>>();
           input_vars.push_back(a);
+        } catch (py::cast_error& err) {
+          PADDLE_THROW(platform::errors::InvalidArgument(
+              "The `PyLayer.forward` function contains invalid argument, the "
+              "`%s` type argument can not be cast into `Tensor`.",
+              ptr->ptr()->ob_type->tp_name));
         }
-      } catch (py::cast_error& err) {
-        // Only collect Tensor type in 'args' and pass them to backward. Ignore
-        // other types of input temporarily.
       }
     }
   }
   // process kwargs, only collect `imperative::VarBase`
   if (!kwargs.empty()) {
     for (auto ptr = kwargs.begin(); ptr != kwargs.end(); ptr++) {
-      try {
-        if (Py_None != ptr->second.ptr()) {
+      // Only collect Tensor type in 'kwargs' and pass them to backward.
+      // Ignore other types of input temporarily.
+      if (py::isinstance<imperative::VarBase>(*ptr->second)) {
+        try {
           auto a = ptr->second.cast<std::shared_ptr<VarBase>>();
           input_vars.push_back(a);
+        } catch (py::cast_error&) {
+          PADDLE_THROW(platform::errors::InvalidArgument(
+              "The `PyLayer.forward` function contains invalid argument, the "
+              "`%s` type argument can not be cast into `Tensor`.",
+              ptr->second.ptr()->ob_type->tp_name));
         }
-      } catch (py::cast_error&) {
-        // Only collect Tensor type in 'kwargs' and pass them to backward.
-        // Ignore other types of input temporarily.
       }
     }
   }
@@ -110,33 +129,35 @@ py::object PyLayerApply(const platform::Place& place, const py::handle& cls,
       PyList_Check(result_forward.ptr())) {
     auto tuple_result = result_forward.cast<py::tuple>();
     for (size_t i = 0; i < tuple_result.size(); i++) {
-      if (Py_None != tuple_result[i].ptr()) {
+      // Only collect Tensor type of output and pass them to backward.
+      // Ignore other types of input temporarily.
+      if (py::isinstance<imperative::VarBase>(tuple_result[i])) {
         try {
           auto temp_out =
               tuple_result[i].cast<std::shared_ptr<imperative::VarBase>>();
           output_vars.push_back(temp_out);
         } catch (py::cast_error&) {
-          // Only collect Tensor type in 'kwargs' and pass them to backward.
-          // Ignore other types of input temporarily.
+          PADDLE_THROW(platform::errors::InvalidArgument(
+              "The `PyLayer.forward` function returns invalid argument, the "
+              "`%s` type argument can not be cast into `Tensor`.",
+              tuple_result[i].ptr()->ob_type->tp_name));
         }
-      } else {
-        // Only collect Tensor type in 'kwargs' and pass them to backward.
-        // Ignore other types of input temporarily.
       }
     }
   } else {
-    if (Py_None != result_forward.ptr()) {
+    // Only collect Tensor type of output and pass them to backward.
+    // Ignore other types of input temporarily.
+    if (py::isinstance<imperative::VarBase>(result_forward)) {
       try {
         auto temp_out =
             result_forward.cast<std::shared_ptr<imperative::VarBase>>();
         output_vars.push_back(temp_out);
       } catch (py::cast_error&) {
-        // Only collect Tensor type in 'kwargs' and pass them to backward.
-        // Ignore other types of input temporarily.
+        PADDLE_THROW(platform::errors::InvalidArgument(
+            "The `PyLayer.forward` function returns invalid argument, the `%s` "
+            "type argument can not be cast into `Tensor`.",
+            result_forward.ptr()->ob_type->tp_name));
       }
-    } else {
-      // Only collect Tensor type in 'kwargs' and pass them to backward.
-      // Ignore other types of input temporarily.
     }
   }
   if (output_vars.size() == 0) {
diff --git a/paddle/fluid/operators/py_layer_op.cc b/paddle/fluid/operators/py_layer_op.cc
@@ -62,13 +62,22 @@ void RunPyObject(py::object *py_object,
     for (size_t i = 0; i < result_tuple.size(); i++) {
       if ((*outs)[i] != nullptr) {
         if (Py_None != result_tuple[i].ptr()) {
-          try {
-            auto result_var =
-                result_tuple[i].cast<std::shared_ptr<imperative::VarBase>>();
-            *(*outs)[i] = result_var->Var();
-          } catch (py::cast_error &) {
+          if (py::isinstance<imperative::VarBase>(result_tuple[i])) {
+            try {
+              auto result_var =
+                  result_tuple[i].cast<std::shared_ptr<imperative::VarBase>>();
+              *(*outs)[i] = result_var->Var();
+            } catch (py::cast_error &) {
+              PADDLE_THROW(platform::errors::InvalidArgument(
+                  "The `PyLayer.backward` function returns invalid argument, "
+                  "the `%s` type argument can not be cast into `Tensor`.",
+                  result_tuple[i].ptr()->ob_type->tp_name));
+            }
+          } else {
             PADDLE_THROW(platform::errors::InvalidArgument(
-                "The output of `PyLayer.backward` should be `Tensor`."));
+                "The output of `PyLayer.backward` should be `Tensor`, but "
+                "received `%s`.",
+                result_tuple[i].ptr()->ob_type->tp_name));
           }
         } else {
           PADDLE_THROW(platform::errors::InvalidArgument(
@@ -94,13 +103,22 @@ void RunPyObject(py::object *py_object,
     }
     if ((*outs)[0] != nullptr) {
       if (Py_None != py_result.ptr()) {
-        try {
-          auto result_var =
-              py_result.cast<std::shared_ptr<imperative::VarBase>>();
-          *((*outs)[0]) = result_var->Var();
-        } catch (py::cast_error &) {
+        if (py::isinstance<imperative::VarBase>(py_result)) {
+          try {
+            auto result_var =
+                py_result.cast<std::shared_ptr<imperative::VarBase>>();
+            *((*outs)[0]) = result_var->Var();
+          } catch (py::cast_error &) {
+            PADDLE_THROW(platform::errors::InvalidArgument(
+                "The `PyLayer.backward` function returns invalid argument, the "
+                "`%s` type argument can not be cast into `Tensor`.",
+                py_result.ptr()->ob_type->tp_name));
+          }
+        } else {
           PADDLE_THROW(platform::errors::InvalidArgument(
-              "The output of `PyLayer.backward` should be `Tensor`."));
+              "The output of `PyLayer.backward` should be `Tensor`, but "
+              "received `%s`",
+              py_result.ptr()->ob_type->tp_name));
         }
       } else {
         PADDLE_THROW(platform::errors::InvalidArgument(
diff --git a/python/paddle/fluid/tests/unittests/test_pylayer_op.py b/python/paddle/fluid/tests/unittests/test_pylayer_op.py
@@ -21,6 +21,11 @@
 from paddle.autograd import PyLayer
 
 
+class FakeTensor(paddle.fluid.core.VarBase):
+    def __init__(self):
+        pass
+
+
 class TestPyLayer(unittest.TestCase):
     def test_simple_pylayer_multiple_output(self):
         class tanh(PyLayer):
@@ -426,6 +431,129 @@ def backward(ctx, dy):
             z = paddle.tanh(data)
             z = cus_tanh.apply(data)
 
+    def test_return_to_tensor(self):
+        class Tanh(PyLayer):
+            @staticmethod
+            def forward(ctx, x1):
+                y1 = paddle.tanh(x1)
+                ctx.save_for_backward(y1)
+                tensor_1 = paddle.to_tensor([1, 2], dtype='float32')
+                return y1, 5, None, "helloworld", tensor_1
+
+            @staticmethod
+            def backward(ctx, dy1, dy2):
+                y1, = ctx.saved_tensor()
+                re1 = dy1 * (1 - paddle.square(y1))
+                return dy1
+
+        input1 = paddle.randn([2, 3]).astype("float32")
+        input2 = input1.detach().clone()
+        input1.stop_gradient = False
+        input2.stop_gradient = False
+        z, number, none_item, string_item, tensor1 = Tanh.apply(x1=input1)
+        z.mean().backward()
+
+
+class TestPyLayerReturnType(unittest.TestCase):
+    def test_forward_args_fake_tensor(self):
+        class Tanh(PyLayer):
+            @staticmethod
+            def forward(ctx, x1):
+                y1 = FakeTensor()
+                return y1, x1
+
+            @staticmethod
+            def backward(ctx, dy1, dy2):
+                return dy1
+
+        input1 = FakeTensor()
+
+        with self.assertRaises(ValueError):
+            y1, y2 = Tanh.apply(input1)
+
+    def test_forward_kwargs_fake_tensor(self):
+        class Tanh(PyLayer):
+            @staticmethod
+            def forward(ctx, x1):
+
+                return x1
+
+            @staticmethod
+            def backward(ctx, dy1, dy2):
+                return dy1
+
+        input1 = FakeTensor()
+
+        with self.assertRaises(ValueError):
+            y = Tanh.apply(x1=input1)
+
+    def test_forward_return_fake_tensor(self):
+        class Tanh(PyLayer):
+            @staticmethod
+            def forward(ctx, x1):
+
+                return FakeTensor()
+
+            @staticmethod
+            def backward(ctx, dy1, dy2):
+                return dy1
+
+        input1 = paddle.randn([3, 2])
+
+        with self.assertRaises(ValueError):
+            y = Tanh.apply(x1=input1)
+
+    def test_forward_return_fake_tensor_tuple(self):
+        class Tanh(PyLayer):
+            @staticmethod
+            def forward(ctx, x1):
+
+                return FakeTensor(), FakeTensor()
+
+            @staticmethod
+            def backward(ctx, dy1, dy2):
+                return dy1
+
+        input1 = paddle.randn([3, 2])
+
+        with self.assertRaises(ValueError):
+            y = Tanh.apply(x1=input1)
+
+    def test_backward_return_fake_tensor_tuple(self):
+        class Tanh(PyLayer):
+            @staticmethod
+            def forward(ctx, x1, x2):
+                return x1 + 1, x1 + 2
+
+            @staticmethod
+            def backward(ctx, dy1, dy2):
+
+                return FakeTensor(), 2
+
+        input1 = paddle.randn([3, 2])
+        input1.stop_gradient = False
+        y, _ = Tanh.apply(input1, 1 + input1)
+
+        with self.assertRaises(ValueError):
+            y.mean().backward()
+
+    def test_backward_return_fake_tensor(self):
+        class Tanh(PyLayer):
+            @staticmethod
+            def forward(ctx, x1):
+                return x1 + 1, x1 + 2
+
+            @staticmethod
+            def backward(ctx, dy1, dy2):
+                return FakeTensor()
+
+        input1 = paddle.randn([3, 2])
+        input1.stop_gradient = False
+        y, _ = Tanh.apply(input1)
+
+        with self.assertRaises(ValueError):
+            y.mean().backward()
+
 
 if __name__ == '__main__':
     unittest.main()